From eb7ab88e20de170ed05d683526934b0962636374 Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Mon, 18 Mar 2019 10:29:05 +1300 Subject: [PATCH] Issue #2991 Remove cylc.profiling package, cylc-profile-battery, and update documentation --- bin/cylc-help | 2 - bin/cylc-profile-battery | 991 ------------------ doc/src/installation.rst | 14 - etc/cylc-bash-completion | 2 +- etc/profile-experiments/busy-validate.json | 62 -- etc/profile-experiments/busy.json | 61 -- etc/profile-experiments/complex-validate.json | 10 - etc/profile-experiments/complex.json | 10 - etc/profile-experiments/diamond-validate.json | 49 - etc/profile-experiments/diamond.json | 48 - etc/profile-experiments/example | 63 -- etc/profile-experiments/experiment.json | 9 - .../family-trigger-validate.json | 55 - .../hello-world-validate.json | 10 - etc/profile-experiments/hello-world.json | 9 - etc/profile-experiments/lazy-validate.json | 50 - etc/profile-experiments/lazy.json | 49 - .../profile-simulation/suite.rc | 42 - etc/profile-experiments/test.json | 18 - lib/cylc/profiling/__init__.py | 128 --- lib/cylc/profiling/analysis.py | 469 --------- lib/cylc/profiling/git.py | 107 -- lib/cylc/profiling/profile.py | 349 ------ tests/profile-battery/00-compatability.t | 67 -- tests/profile-battery/test_header | 1 - tests/validate/04-builtin-suites.t | 1 - 26 files changed, 1 insertion(+), 2675 deletions(-) delete mode 100755 bin/cylc-profile-battery delete mode 100644 etc/profile-experiments/busy-validate.json delete mode 100644 etc/profile-experiments/busy.json delete mode 100644 etc/profile-experiments/complex-validate.json delete mode 100644 etc/profile-experiments/complex.json delete mode 100644 etc/profile-experiments/diamond-validate.json delete mode 100644 etc/profile-experiments/diamond.json delete mode 100644 etc/profile-experiments/example delete mode 100644 etc/profile-experiments/experiment.json delete mode 100644 etc/profile-experiments/family-trigger-validate.json delete mode 100644 etc/profile-experiments/hello-world-validate.json delete mode 100644 etc/profile-experiments/hello-world.json delete mode 100644 etc/profile-experiments/lazy-validate.json delete mode 100644 etc/profile-experiments/lazy.json delete mode 100644 etc/profile-experiments/profile-simulation/suite.rc delete mode 100644 etc/profile-experiments/test.json delete mode 100644 lib/cylc/profiling/__init__.py delete mode 100644 lib/cylc/profiling/analysis.py delete mode 100644 lib/cylc/profiling/git.py delete mode 100644 lib/cylc/profiling/profile.py delete mode 100755 tests/profile-battery/00-compatability.t delete mode 120000 tests/profile-battery/test_header diff --git a/bin/cylc-help b/bin/cylc-help index 1f28e6aff14..52ecd43cd4c 100755 --- a/bin/cylc-help +++ b/bin/cylc-help @@ -274,7 +274,6 @@ hook_commands['check-triggering'] = ['check-triggering'] admin_commands = {} admin_commands['test-battery'] = ['test-battery'] -admin_commands['profile-battery'] = ['profile-battery'] admin_commands['import-examples'] = ['import-examples'] admin_commands['check-software'] = ['check-software'] admin_commands['make-docs'] = ['make-docs'] @@ -344,7 +343,6 @@ catsum['utility'] = "Cycle arithmetic and templating, etc." comsum = {} # admin comsum['test-battery'] = 'Run a battery of self-diagnosing test suites' -comsum['profile-battery'] = 'Run a battery of profiling tests' comsum['import-examples'] = 'Import example suites your suite run directory' comsum['check-software'] = 'Check required software is installed' comsum['make-docs'] = 'Build the HTML documentation with Sphinx.' diff --git a/bin/cylc-profile-battery b/bin/cylc-profile-battery deleted file mode 100755 index 22ffc2753b2..00000000000 --- a/bin/cylc-profile-battery +++ /dev/null @@ -1,991 +0,0 @@ -#!/usr/bin/env python3 -# THIS FILE IS PART OF THE CYLC SUITE ENGINE. -# Copyright (C) 2008-2019 NIWA & British Crown (Met Office) & Contributors. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -"""Orchestrates experiments to profile the performance of cylc at different -versions.""" - -import glob -import hashlib -import itertools -import json -import optparse -import os -import random -import re -import shutil -import sys -import tempfile -import time - -# Write out floats to one decimal place only. -from json import encoder -encoder.FLOAT_REPR = lambda o: format(o, '.1f') - -import cylc.profiling as prof -from cylc.profiling.analysis import (make_table, print_table, plot_results) -import cylc.profiling.git as git - -RUN_DOC = r"""cylc profile-battery [-e [EXPERIMENT ...]] [-v [VERSION ...]] - -Run profiling experiments against different versions of cylc. A list of -experiments can be specified after the -e flag, if not provided the experiment -"complex" will be chosen. A list of versions to profile against can be -specified after the -v flag, if not provided the current version will be used. - -Experiments are stored in etc/profile-experiments, user experiments can be -stored in .profiling/experiments. Experiments are specified without the file -extension, experiments in .profiling/ will be chosen before those in etc/. - -IMPORTANT: See etc/profile-experiments/example for an experiment template with -further details. - -Versions are any valid git identifiers i.e. tags, branches, commits. To compare -results to different cylc versions either: - * Supply cylc profile-battery with a complete list of the versions you wish - to profile, it will then provide the option to checkout the required - versions automatically. - * Checkout each version manually running cylc profile-battery against only - one version at a time. Once all results have been gathered you can then - run cylc profile-battery with a complete list of versions. - -Profiling will save results to .profiling/results.json where they can be used -for future comparisons. To list profiling results run: - * cylc profile-battery --ls # list all results - * cylc profile-battery --ls -e experiment # list all results for - # experiment "experiment". - * cylc profile-battery --ls --delete -v 6.1.2 # Delete all results for - # version 6.1.2 (prompted). - -If matplotlib and numpy are installed profiling generates plots which are -saved to .profiling/plots or presented in an interactive window using the -i -flag. - -Results are stored along with a checksum for the experiment file. When an -experiment file is changed previous results are maintained, future results will -be stored separately. To copy results from an older version of an experiment -into those from the current one run: - * cylc profile-battery --promote experiment@checksum -NOTE: At present results cannot be analysed without the experiment file so old -results must be "copied" in this way to be re-used. - -The results output contain only a small number of metrics, to see a full list -of results use the --full option. -""" - - -def create_profile_directory(): - """Creates a directory for storing results and user experiments in.""" - profile_dir = os.path.join(prof.CYLC_DIR, prof.PROFILE_DIR_NAME) - os.mkdir(profile_dir) - os.mkdir(os.path.join(profile_dir, prof.PROFILE_PLOT_DIR_NAME)) - os.mkdir(os.path.join(profile_dir, prof.USER_EXPERIMENT_DIR_NAME)) - - -def create_profile_file(): - """Creates file for storing profiling results in.""" - profile_dir = os.path.join(prof.CYLC_DIR, prof.PROFILE_DIR_NAME) - with open(os.path.join(profile_dir, prof.PROFILE_FILE_NAME), - 'w+') as profile_file: - profile_file.write('{}') - - -def parse_args(): - """Parse command line arguments for this script.""" - def multi_arg_callback(option, _, value, parser): - """Allows an unkonwn number of arguments to be passed as an option.""" - assert value is None - value = [] - for arg in parser.rargs: - if arg[0] == '-': - break - value.append(arg) - del parser.rargs[:len(value)] - setattr(parser.values, option.dest, value) - - parser = optparse.OptionParser(RUN_DOC) - parser.add_option('-e', '--experiments', - help='Specify list of experiments to run.', - dest='experiments', callback=multi_arg_callback, - action='callback') - parser.add_option('-v', '--versions', - help='Specify cylc versions to profile. Git tags, ' + - 'branches, commits are all valid.', - dest='versions', callback=multi_arg_callback, - action='callback') - parser.add_option('-i', '--interactive', action='store_true', - help='Open any plots in interactive window rather ' - 'saving them to files.', default=False) - parser.add_option('-p', '--no-plots', action='store_true', default=False, - help='Don\'t generate any plots.') - parser.add_option('--ls', '--list-results', action='store_true', - default=False, help='List all stored results. ' + - 'Experiments and versions to list can be specified ' + - 'using --experiments and --versions.') - parser.add_option('--delete', action='store_true', default=False, - help='Delete stored results (to be used in ' + - 'combination with --list-results).') - parser.add_option('--yes', '-y', action='store_true', default=False, - help='Answer yes to any user input. Will check-out ' - 'cylc versions as required.') - parser.add_option('--full-results', '--full', action='store_true', - default=False, help='Display all gathered metrics.') - parser.add_option('--lobf-order', dest='lobf_order', help='The order (int)' - 'of the line of best fit to be drawn. 0 for no lobf, ' - '1 for linear, 2 for quadratic ect.', default=2, - type='int') - parser.add_option('--promote', type='str', help='Promote results from an ' - 'older version of an experiment to the current version. ' - 'To be used when making non-functional changes to an ' - 'experiment.') - parser.add_option('--test', action='store_true', default=False, - help='For development purposes, run experiment without ' - 'saving results and regardless of any prior runs.') - opts = parser.parse_args()[0] - - # Defaults for experiments and versions if we are not in list mode. - if not (opts.ls or opts.delete): - if not opts.experiments: - opts.experiments = ["complex"] - if not opts.versions: - opts.versions = ["HEAD"] - else: - if not opts.experiments: - opts.experiments = [] - if not opts.versions: - opts.versions = [] - - return opts - - -def get_results(): - """Return data from the results file.""" - if not os.path.exists(os.path.join(prof.CYLC_DIR, prof.PROFILE_DIR_NAME)): - create_profile_directory() - if not os.path.exists(os.path.join(prof.CYLC_DIR, prof.PROFILE_DIR_NAME, - prof.PROFILE_FILE_NAME)): - create_profile_file() - return {} - else: - # Profile file exists, git list of results contained. - profile_file_path = os.path.join(prof.CYLC_DIR, prof.PROFILE_DIR_NAME, - prof.PROFILE_FILE_NAME) - with open(profile_file_path, 'r') as profile_file: - try: - profile_results = json.load(profile_file) - except ValueError as exc: - print(exc) - sys.exit('ERROR: Could not read "%s". Check that it is valid' - ' JSON or delete the file.' % profile_file_path) - return profile_results - - -def get_result_keys(): - """Return a list of (version_id, experiment_id,) tuples.""" - profile_results = get_results() - result_keys = [] - for version_id, experiment_ids in profile_results.items(): - result_keys.extend([(version_id, experiment_id) for experiment_id - in experiment_ids.keys()]) - return result_keys - - -def get_schedule(versions, experiments, test=False): - """Determine which experiments to run with which versions. - - Return: - tuple - (schedule, experiments_to_run) - - schedule (dict) - Dictionary of cylc version ids containing lists - of the experiments to run for each. - - experiments_to_run (set) - Set of (version_id, experiment_id) - tuples of the experiments to run. - """ - experiment_keys = itertools.product( - [version['id'] for version in versions], - [experiment['id'] for experiment in experiments]) - result_keys = get_result_keys() - - # Exclude any previously acquired results so that experiments are not run - # twice. - if test: - # Don't exclude experiments if in "test" mode. - experiments_to_run = set(experiment_keys) - else: - experiments_to_run = set(experiment_keys) - set(result_keys) - - ret = {} - for version_id, experiment_id in experiments_to_run: - if version_id not in ret: - ret[version_id] = [] - for experiment in experiments: - if experiment_id == experiment['id']: - ret[version_id].append(experiment) - break - return ret, set(item[1] for item in experiments_to_run) - - -def get_versions(version_names): - """Produces a list of version objects from a list of cylc version names.""" - versions = [] - for version_name in version_names: - version_id = git.describe(version_name) - if version_id: - versions.append({ - 'name': version_name, - 'id': version_id - }) - else: - sys.exit('ERROR: cylc version "%s" not reccognised' % version_name) - return versions - - -def get_checksum(file_path, chunk_size=4096): - """Returns a hash of a file.""" - hash_ = hashlib.sha256() - with open(file_path, 'rb') as file_: - for chunk in iter(lambda: file_.read(chunk_size), b""): - hash_.update(chunk) - return hash_.hexdigest()[:15] - - -def load_experiment_config(experiment_file): - """Returns a dictionary containing the contents of the experiment file.""" - with open(experiment_file, 'r') as file_: - try: - ret = json.load(file_) - except ValueError as exc: - sys.exit('ERROR: Invalid JSON in experiment file"{0}"\n{1}'.format( - experiment_file, exc)) - - # Prepend CYLC_DIR to suite definition paths if they aren't provided as - # absolute paths. - try: - for run in ret['runs']: - if not os.path.isabs(os.path.expanduser(run['suite dir'])): - run['suite dir'] = os.path.join(prof.CYLC_DIR, - run['suite dir']) - run['suite dir'] = os.path.realpath(run['suite dir']) - except KeyError as exc: - print(exc) - sys.exit('Error: Experiment definition not complete.') - - # Apply defaults. - for run in ret['runs']: - if 'repeats' not in run: - run['repeats'] = 0 - if 'options' not in run: - run['options'] = [] - if 'profile modes' not in ret: - ret['profile modes'] = prof.DEFAULT_PROFILE_MODES - if 'analysis' not in ret: - ret['analysis'] = 'single' - - return ret - - -def install_experiments(experiment_ids, experiments, install_dir, - checkout_required=False): - """Install experiments with the provided ids as necessary.""" - codicil_path = os.path.join(prof.CYLC_DIR, prof.EXPERIMENTS_PATH, - 'profile-simulation', 'suite.rc') - install_sdir = os.path.join(install_dir, 'suites') - os.mkdir(install_sdir) - - install_modes = { - 'copy': shutil.copyfile, - 'symlink': os.symlink - } - - # Determine which suites require installation. - suite_dirs = {} - for experiment_id in experiment_ids: - experiment = None - for exp in experiments: - if exp['id'] == experiment_id: - experiment = exp - break - if not experiment: - raise Exception('Could not find experiment definition.') - append_codicil = ('mode' in experiment['config'] and - experiment['config']['mode'] == 'profile-simulation') - for run in experiment['config']['runs']: - sdir = os.path.realpath(run['suite dir']) - # Is suite within the cylc repository. - in_cylc_repo = ( - sdir.startswith(os.path.realpath(prof.CYLC_DIR)) and not - sdir.startswith(os.path.realpath(os.path.join( - prof.CYLC_DIR, - prof.PROFILE_DIR_NAME)))) - if not append_codicil and not (in_cylc_repo and checkout_required): - # Don't install suite unless: - # - We are in profile-battery mode - # - The suite is in the cylc repo and we need to checkout - # another cylc version - continue - if in_cylc_repo: - install_mode = install_modes['copy'] - else: - install_mode = install_modes['symlink'] - key = (sdir, append_codicil,) - if sdir not in suite_dirs: - new_sdir = os.path.join(install_sdir, str(random.random())[2:]) - os.mkdir(new_sdir) - suite_dirs[key] = {'install dir': new_sdir, 'runs': [run], - 'install fcn': install_mode} - else: - suite_dirs[key]['runs'].append(run) - - # Install suites. - dont_symlink = ['passphrase', 'ssl.cert', 'ssl.pem', 'suite.rc.processed'] - for key in suite_dirs: - sdir, append_codicil = key - install_dir = suite_dirs[key]['install dir'] - install_fcn = suite_dirs[key]['install fcn'] - # Symlink / copy files as appropriate - for filepath in glob.glob(os.path.join(sdir, '*')): - filename = os.path.basename(filepath) - if filename in dont_symlink: - continue - dest = os.path.join(install_dir, filename) - if append_codicil and filename == 'suite.rc': - # Symlink the suite.rc file as suite.rc-orig. - install_fcn(filepath, dest + '-orig') - else: - # Symlink suite files / directories. - install_fcn(filepath, dest) - # Include suite.rc-orig and codicil.rc if in profile-simulation mode. - if append_codicil: - install_fcn(codicil_path, os.path.join(install_dir, 'codicil.rc')) - with open(os.path.join(install_dir, 'suite.rc'), 'a') as suite_rc: - suite_rc.write('#!jinja2\n' - '{% include "suite.rc-orig" %}\n' - '{% include "codicil.rc" %}') - - # Update experiments to installation directories. - for sdir, append_codicil in suite_dirs: - key = (sdir, append_codicil,) - for run in suite_dirs[key]['runs']: - print('installing suite "%s" => "%s"' % ( - sdir, suite_dirs[key]['install dir'])) - run['suite dir'] = suite_dirs[key]['install dir'] - - # Global config sourcing. - os.mkdir(os.path.join(install_sdir, 'globalrc')) - for experiment in experiments: - for run in experiment['config']['runs']: - if 'globalrc' in run: - string = '' - for setting in run['globalrc']: - indent = 0 - setting = re.split(r'[\[\]]+', setting.strip()) - for part in setting[:-1]: # Key hierarchy. - if not part: - continue - string += '%s%s%s%s\n' % ( - ' ' * indent, - '[' * (indent + 1), - part, - ']' * (indent + 1) - ) - indent += 1 - string += '%s%s\n' % (' ' * indent, setting[-1]) - hash_ = hashlib.sha256() - hash_.update(string) - dirname = os.path.join(install_sdir, 'globalrc', - hash_.hexdigest()[:10]) - if not os.path.exists(dirname): - # If an identical globalrc has been written do nothing. - os.mkdir(dirname) - with open(os.path.join(dirname, 'global.rc'), - 'w+') as globalrc_file: - globalrc_file.write(string) - run['globalrc'] = dirname - - -def get_experiments(experiment_names): - """Returns a dictionary of experiment names against experiment ids (which - contain a checksum).""" - experiments = [] - for experiment_name in experiment_names: - file_name = experiment_name + '.json' - # Look for experiment file in the users experiment directory. - file_path = os.path.join(prof.CYLC_DIR, prof.PROFILE_DIR_NAME, - prof.USER_EXPERIMENT_DIR_NAME, file_name) - if not os.path.exists(file_path): - # Look for experiment file in built-in experiment directory. - file_path = os.path.join(prof.CYLC_DIR, prof.EXPERIMENTS_PATH, - file_name) - if not os.path.exists(file_path): - # Could not find experiment file in either path. Exit! - print('ERROR: Could not find experiment file for "%s"' % ( - experiment_name)) - experiments.append({'name': experiment_name, - 'id': 'Invalid', - 'file': None}) - continue - config = load_experiment_config(file_path) - experiments.append({ - 'name': experiment_name, - 'id': '{0}@{1}'.format(experiment_name, get_checksum(file_path)), - 'file': file_path, - 'config': config - }) - return experiments - - -def print_manual_scheme(versions, experiments, all_versions=None): - """Writes a list of bash commands to run in order to perform profing - without automation of checkout out cylc versions.""" - # TODO: Generate from schedule. - if all_versions: - ver = ' '.join(version['id'] for version in all_versions) - else: - ver = ' '.join(version['id'] for version in versions) - exp = ' '.join(experiment['name'] for experiment in experiments) - for version in versions: - print('\t$ git checkout ' + version['id']) - print('\t$ cylc profile-battery --experiments ' + exp) - print('\t$ cylc profile-battery --versions {versions} --experiments ' - '{experiments}'.format(versions=ver, experiments=exp)) - - -def determine_action(schedule, versions, experiments, non_interactive=False): - """Determines whether it is necessary to checkout differnet cylc - version(s). - - Prompts user as to whether they want to use automated - checkout and if so for what. - """ - # Determine which versions need to be checked out. - current_version = git.describe('HEAD') - other_versions = [] - for version_id in schedule: - if version_id != current_version: - for version in versions: - if version_id == version['id']: - other_versions.append(version) - - # Check for potential incompatability with PROFILE_MODE_CYLC. - for experiment in experiments: - if prof.PROFILE_MODE_CYLC in experiment['config']['profile modes']: - # Check suitability of profile-mode cylc with this schedule. - temp = [] - for version_id in schedule: - if not git.is_ancestor_commit(prof.CYLC_PROFILING_COMMIT, - version_id): - for version in versions: - if version['id'] == version_id: - temp.append(version) - if temp and not non_interactive: - # Profile-mode cylc might not be suitible, warn user. - print('WARNING: You are trying to use the "cylc" profile mode ' - 'with versions of cylc which predate the profiling ' - 'module namely:\n' - '\t' + ' '.join(version['name'] for version in temp) + - '\n\nTo profile these versions you will need to back ' - 'port the profiling module as well as some of the memory' - ' checkpointing in the main loop.\n') - usr = None - while usr not in ['y', 'n']: - usr = input('proceed? (y/n): ') - if usr == 'n': - sys.exit('Profiling aborted by user.') - print() - elif temp: - print(('WARNING: You are using profile-mode ' - '"cylc" with older versions of cylc.'), - file=sys.stderr) - - # Prompt user over using automated checkout. - to_checkout = [] - if other_versions and not non_interactive: - manual_versions = [] - automatic_only_versions = [] - for version in other_versions: - if git.is_ancestor_commit(prof.PROFILE_COMMIT, version['id']): - manual_versions.append(version) - else: - automatic_only_versions.append(version) - - print('To perform profiling different cylc versions will need to be ' - 'checked out. I can checkout and profile versions ' - 'automatically.') - print('If using the automatic checkout system ensure that there are ' - 'no un-commited changes before proceeding and do not make ' - 'any changes to the local repository whist the profiling is ' - 'running\n') - - if automatic_only_versions: - print('These versions can only be profiled ' - 'automatically:\n\t{0}'.format( - ' '.join(version['name'] for version in - automatic_only_versions) - )) - if manual_versions: - print('These versions you can profile manually if you ' - 'prefer:\n\t{0}'.format( - ' '.join(version['name'] for version in - manual_versions))) - - print() - - if manual_versions and not automatic_only_versions: - response = None - while response not in ['y', 'n']: - response = input('Do you want to checkout these versions ' - 'automatically? (y/n): ') - if response == 'n': - print('You can perform this profiling manually by doing ' - 'something like:') - print_manual_scheme(manual_versions, experiments, - all_versions=versions) - sys.exit('Profiling aborted by user.') - else: - to_checkout = manual_versions - elif manual_versions and automatic_only_versions: - response = None - while response not in ['some', 'all', 'none']: - response = input( - 'Which versions should I check out:\n\t' - 'Only those which cannot be profiled otherwise (some)\n\t' - 'All versions (all)\n\t' - 'None (none)\n> ') - if response == 'some': - print('The remainder can be profiled by doing something like:') - print_manual_scheme(manual_versions, experiments, - all_versions=versions) - to_checkout = automatic_only_versions - if response == 'none': - print('Some versions can be profiled manually by doing ' - 'something like:') - print_manual_scheme(manual_versions, experiments, - all_versions=manual_versions) - sys.exit('Profiling aborted by user.') - else: - to_checkout = manual_versions + automatic_only_versions - elif automatic_only_versions: - response = None - while response not in ['y', 'n']: - response = input('Do you want to checkout these versions ' - 'automatically? (y/n): ') - if response == 'y': - to_checkout = automatic_only_versions - else: - sys.exit('Profiling aborted by user.') - if other_versions and non_interactive: - to_checkout = other_versions - return to_checkout - - -def update_nested_dictionaries(old, new): - """Merges entries from new into old (overwrites old with new in the event - of a conflict.""" - old = old.copy() - new = new.copy() - for key, value in new.items(): - if key in old: - if isinstance(value, dict): - old[key] = update_nested_dictionaries(old[key], new[key]) - else: - old[key] = value - else: - old[key] = value - return old - - -def append_new_results(results): - """Append new profiling results to results file.""" - profile_file_path = os.path.join(prof.CYLC_DIR, prof.PROFILE_DIR_NAME, - prof.PROFILE_FILE_NAME) - try: - with open(profile_file_path, 'r') as file_: - previous_results = json.load(file_) - except IOError as exc: - if exc.errno == 2: - previous_results = {} - else: - raise - - ret = update_nested_dictionaries(previous_results, results) - os.remove(profile_file_path) - with open(profile_file_path, 'w+') as file_: - json.dump(ret, file_) - - -def delete_results(result_keys, interactive=False): - """Delete results from the results file provided as a list of version_id, - experiment_id tuples.""" - if interactive: - usr = None - while usr not in ['y', 'n']: - usr = input('Delete these results (y/n)? ') - if usr != 'y': - sys.exit(0) - - results = get_results() - - for version_id, experiment_id in result_keys: - try: - del results[version_id][experiment_id] - if not results[version_id]: - del results[version_id] - except KeyError: - pass - - profile_file_path = os.path.join(prof.CYLC_DIR, prof.PROFILE_DIR_NAME, - prof.PROFILE_FILE_NAME) - os.remove(profile_file_path) - with open(profile_file_path, 'w+') as results_file: - json.dump(results, results_file) - - -def install_profiler(): - """Transfer profiling code and resources to a temporary directory to enable - different cylc versions to be checked out.""" - try: - # Temp dir to install files to - tempdir = tempfile.mkdtemp() - print('Installing profiler to:', tempdir) - - shutil.copytree( - os.path.join(prof.CYLC_DIR, 'lib', 'cylc', 'profiling'), - os.path.join(tempdir, 'tempprofiling') - ) - - # Append profiling code to $PATH - sys.path.insert(0, os.path.join(tempdir)) - sys.path.insert(0, os.path.join(tempdir, 'tempprofiling')) - except Exception as exc: - # Slightest hint of trouble => abort. - print(exc) - sys.exit('ERROR: Problem installing profiling.') - return tempdir - - -def run_schedule(schedule, experiments, versions, exps_to_run, - non_interactive=False, test=False): - """Orchestrates profiling the versions/experiments contained in the - schedule. - - Args: - schedule (dict): A dictionary of cylc version_ids containing lists of - experiments to run for each. - experiments (list): List of experiment dicts. - versions (list): List of version dicts. - exps_to_run (set): Set of (version_id, experiment_id) tuples for the - experiments to run. - non_interactive (bool - optional): If True prompting is disabled. - test (bool - optional): If True all experiments will be run - irrespective of any previous results. New results will not be - saved. - - Return: - bool: True if ALL profiling has been successfull. - - """ - # Ask the user which versions to profile. - other_versions = determine_action(schedule, versions, experiments, - non_interactive or test) - - # Install profiler if necessary. - if other_versions: - # Some versions will need to be checked-out. Install the profiling code - # outside the working tree then proceed. - if git.has_changes_to_be_committed(): - sys.exit('Please commit any changes before proceeding.') - profiler_install_dir = install_profiler() - try: - from tempprofiling.profile import profile - from tempprofiling.git import checkout, GitCheckoutError - except ImportError: - shutil.rmtree(profiler_install_dir, ignore_errors=True) - sys.exit('ERROR: Failed to install profiler.') - else: - # No cylc versions need to be checkout-out. - from cylc.profiling.profile import profile - profiler_install_dir = tempfile.mkdtemp() - - # Install experiments as necessary - install_experiments(exps_to_run, experiments, profiler_install_dir, - checkout_required=True if other_versions else False) - - # Run profiling. - results, checkout_count, success = profile(schedule) - - # Delete profiler and experiments if created. - if success: - shutil.rmtree(profiler_install_dir, ignore_errors=True) - - # Append results to results file. - if not test: - append_new_results(results) - - # Return git repo to original location (if changed). - if checkout_count > 0: - try: - if git.has_changes_to_be_committed(): - raise GitCheckoutError() - checkout(r'@{-%d}' % checkout_count, delete_pyc=True) - except GitCheckoutError: - print(('ERROR: Could not checkout git repo to original location. ' - r'\n\t$ git checkout @{-%d}' % checkout_count)) - - # Stop here if profiling was un-successfull. - if not success: - print('ERROR: Some experiments failed to run, no plotting will be ' - 'attempted.') - - return success - - -def run_analysis(experiments, versions, interactive=False, - quick_analysis=True, lobf_order=2, plot=True): - """Runs analysis over the results already acquired. - - Args: - versions (list): List of version dicts. - experiments (list): List of experiment dicts. - interactive (bool - optional): If True then interractive matplotlib - windows will display rather than being rendered to a file. - quick_analysis (bool - optional): If True then only a small set of the - gathered metrics will be output. - lobf_order (int - optional): The polynomial order to be used for - generating the lines of best fit on all plots produced. - plot (bool - optional): If True then plotting will be performed. - - """ - # Get results - with open(os.path.join(prof.CYLC_DIR, - prof.PROFILE_DIR_NAME, - prof.PROFILE_FILE_NAME), 'r') as profile_file: - full_results = json.load(profile_file) - - # Run analysis for each experiment requested. - for experiment in experiments: - plt_dir = False - if not interactive: - plt_dir = os.path.join(prof.CYLC_DIR, - prof.PROFILE_DIR_NAME, - prof.PROFILE_PLOT_DIR_NAME, - experiment['name'] + '-' + - str(int(time.time()))) - os.makedirs(plt_dir) - - # Print a table of results. - print() - print_table( - make_table(full_results, versions, experiment, - quick_analysis=quick_analysis), - transpose=not quick_analysis - ) - print() - - # Plot results. - if not plot: - continue - plot_results(full_results, versions, experiment, plt_dir, - quick_analysis=quick_analysis, lobf_order=lobf_order) - if plt_dir: - print( - f'Results for experiment "{experiment["name"]}" have been ' - f'written out to "{plt_dir}"') - - -def ls(exp_names, ver_names, delete=False): - """List all results for the provided experiment and version names. - - Args: - delete (bool - optional): If true the user is prompted whether to - delete the selected results. - - """ - results = get_results() # Get contents of results file. - include = {} # Dict of all results to list, exp_name: exp_id: [ver_id] - all_versions = [] # List of all version ids contained in 'include' - - def include_result(experiment_name, experiment_id, version_id): - if experiment_name not in include: - include[experiment_name] = {} - if experiment_id not in include[experiment_name]: - include[experiment_name][experiment_id] = [] - include[experiment_name][experiment_id].append(version_id) - if version_id not in all_versions: - all_versions.append(version_id) - - if not exp_names and not ver_names: - # No experiments or versions specified => list all results. - for version_id in results: - for experiment_id in results[version_id]: - experiment_name = experiment_id.split('@')[0] - include_result(experiment_name, experiment_id, version_id) - else: - # List only specified experiments and versions. - version_ids = list(map(git.describe, ver_names)) - experiment_ids = set(name for name in exp_names if '@' in name) - experiment_names = set(exp_names) - experiment_ids - - for version_id in results: - if ver_names and version_id not in version_ids: - continue - for experiment_id in results[version_id]: - experiment_name = experiment_id.split('@')[0] - if (not exp_names or (experiment_name in experiment_names or - experiment_id in experiment_ids)): - include_result(experiment_name, experiment_id, version_id) - - git.order_identifiers_by_date(all_versions) - - experiments = get_experiments(list(include)) - current_experiment_ids = [] - for experiment in experiments: - current_experiment_ids.append(experiment['id']) - - table = [['Experiment Name', 'Experiment ID', 'Version ID'], - [None, None, None]] - for experiment_name in sorted(include): - table.append([experiment_name, None, None]) - for experiment_id in include[experiment_name]: - if experiment_id in current_experiment_ids: - table.append(['', '* ' + experiment_id, None]) - else: - table.append(['', experiment_id, None]) - for version_id in all_versions: - if version_id in include[experiment_name][experiment_id]: - table.append(['', '', version_id]) - - print_table(table) - - if delete: - filtered_keys = [] - for experiment_name in include: - for experiment_id in include[experiment_name]: - for version_id in include[experiment_name][experiment_id]: - filtered_keys.append((version_id, experiment_id,)) - delete_results(filtered_keys, interactive=True) - - -def promote(experiment_id, yes=False): - """Promote any results for the provided experiment version to the current - version.""" - if '@' not in experiment_id: - sys.exit('A version must be supplied to promote an experiment e.g. ' - 'exp@a1b2c3d4e5') - experiment_name, experiment_version = experiment_id.rsplit('@', 1) - - results = get_results() # Get contents of results file. - - cur_exp_id = get_experiments([experiment_name])[0]['id'] - - candidate_versions = [] - target_versions = [] - for version in results: - for exp_id in results[version]: - exp_name, exp_ver = exp_id.rsplit('@', 1) - if exp_name != experiment_name: - continue - if exp_ver == experiment_version: - candidate_versions.append(version) - elif exp_id == cur_exp_id: - target_versions.append(version) - - if not candidate_versions: - sys.exit('There are no results for experiment "{experiment_id}".' - ''.format(experiment_id=experiment_id)) - ls([experiment_name], []) - if target_versions: - candidate_versions = [version for version in candidate_versions if - version not in target_versions] - print() - print('Only the results for cylc versions not already profiled in ' - 'the current experiment version will be promoted.') - git.order_identifiers_by_date(candidate_versions) - - print() - print(('Promote the following results for experiment "{name}" at version ' - '"{candidate}" to the current version "{target}":'.format( - name=experiment_name, - candidate=experiment_version, - target=cur_exp_id.rsplit('@', 1)[1]))) - print('\t', ' '.join(candidate_versions)) - - if not yes: - response = None - while response not in ['y', 'n']: - response = input('Upgrade these versions? (y/n): ') - if yes or response == 'y': - # Promote results. - try: - for version in candidate_versions: - results[version][cur_exp_id] = results[version][experiment_id] - except KeyError as exc: - print(exc) - sys.exit('Unexpected error.') - else: - append_new_results(results) - # Provide option to delete duplicates. - ls([experiment_id], candidate_versions, delete=True) - else: - sys.exit('Aborted, not changes made.') - - -def main(): - """cylc profile-battery""" - opts = parse_args() - - if not prof.IS_GIT_REPO: - print('ERROR: profiling requires cylc to be a git repository.', - file=sys.stderr) - sys.exit(2) - - # Promote mode. - if opts.promote: - promote(opts.promote, opts.yes) - sys.exit(0) - - # If in "list" mode print out results then exit. - if opts.ls or opts.delete: - ls(opts.experiments, opts.versions, delete=opts.delete) - sys.exit(0) - - # Generate list of requested experiments and versions. - experiments = get_experiments(opts.experiments) - versions = get_versions(opts.versions) - - # Order versions. - git.order_versions_by_date(versions) - - # Fail in the event that an experiment file cannot be found. - if not all(experiment['file'] for experiment in experiments): - sys.exit('Experiment file(s) could not be loaded, profiling aborted.') - - # Run experiments as necessary. - schedule, exps_to_run = get_schedule(versions, experiments, test=opts.test) - if schedule: - if not run_schedule(schedule, experiments, versions, exps_to_run, - opts.yes, opts.test): - sys.exit('Profiling failed.') - - # Don't run analysis if in "test" mode. - if opts.test: - sys.exit(0) - - # Run analysis - run_analysis(experiments, versions, opts.interactive, - not opts.full_results, opts.lobf_order, - plot=not opts.no_plots) - - -if __name__ == '__main__': - main() diff --git a/doc/src/installation.rst b/doc/src/installation.rst index 914dc29f80b..aa387012a61 100644 --- a/doc/src/installation.rst +++ b/doc/src/installation.rst @@ -267,17 +267,3 @@ Code Style Tests lib/Jinja2Filters/*.py \ lib/parsec/*.py \ $(grep -l '#!.*\' bin/*) - -Performance Tests - A system for measuring the performance of Cylc as measured against reference - suites. - - Location - * ``etc/profile-experiments/`` - * ``.profiling/experiments`` - Configuration - ``.profiling`` - Execution - .. code-block:: console - - $ cylc profile-battery -e EXPERIMENT .. -v VERSION .. diff --git a/etc/cylc-bash-completion b/etc/cylc-bash-completion index dca886f5072..bb382389531 100644 --- a/etc/cylc-bash-completion +++ b/etc/cylc-bash-completion @@ -38,7 +38,7 @@ _cylc() { cur="${COMP_WORDS[COMP_CWORD]}" sec="${COMP_WORDS[1]}" opts="$(cylc print -x -y 2>/dev/null)" - suite_cmds="broadcast|bcast|cat-log|log|cat-state|check-versions|checkpoint|diff|compare|documentation|browse|dump|edit|ext-trigger|external-trigger|get-directory|get-suite-config|get-config|get-suite-version|get-cylc-version|graph|graph-diff|hold|import-examples|insert|jobscript|kill|list|ls|ls-checkpoints|monitor|nudge|ping|poll|print|profile-battery|register|release|unhold|reload|remove|report-timings|reset|restart|run|start|scan|scp-transfer|search|grep|set-verbosity|show|spawn|stop|shutdown|submit|single|suite-state|test-battery|trigger|validate|view|warrenty" + suite_cmds="broadcast|bcast|cat-log|log|cat-state|check-versions|checkpoint|diff|compare|documentation|browse|dump|edit|ext-trigger|external-trigger|get-directory|get-suite-config|get-config|get-suite-version|get-cylc-version|graph|graph-diff|hold|import-examples|insert|jobscript|kill|list|ls|ls-checkpoints|monitor|nudge|ping|poll|print|register|release|unhold|reload|remove|report-timings|reset|restart|run|start|scan|scp-transfer|search|grep|set-verbosity|show|spawn|stop|shutdown|submit|single|suite-state|test-battery|trigger|validate|view|warrenty" if [[ ${COMP_CWORD} -eq 1 ]]; then diff --git a/etc/profile-experiments/busy-validate.json b/etc/profile-experiments/busy-validate.json deleted file mode 100644 index 807e1366d57..00000000000 --- a/etc/profile-experiments/busy-validate.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "runs": [ - { - "name": "1", - "suite dir": "etc/dev-suites/chains", - "options": ["tasks_per_chain=5", "chains=1"], - "repeats": 1 - }, - { - "name": "2", - "suite dir": "etc/dev-suites/chains", - "options": ["tasks_per_chain=5", "chains=2"], - "repeats": 1 - }, - { - "name": "5", - "suite dir": "etc/dev-suites/chains", - "options": ["tasks_per_chain=5", "chains=5"], - "repeats": 0 - }, - { - "name": "10", - "suite dir": "etc/dev-suites/chains", - "options": ["tasks_per_chain=5", "chains=10"], - "repeats": 0 - }, - { - "name": "25", - "suite dir": "etc/dev-suites/chains", - "options": ["tasks_per_chain=5", "chains=25"], - "repeats": 0 - }, - { - "name": "50", - "suite dir": "etc/dev-suites/chains", - "options": ["tasks_per_chain=5", "chains=50"], - "repeats": 0 - }, - { - "name": "75", - "suite dir": "etc/dev-suites/chains", - "options": ["tasks_per_chain=5", "chains=75"], - "repeats": 0 - }, - { - "name": "100", - "suite dir": "etc/dev-suites/chains", - "options": ["tasks_per_chain=5", "chains=100"], - "repeats": 0 - }, - { - "name": "200", - "suite dir": "etc/dev-suites/chains", - "options": ["tasks_per_chain=5", "chains=200"], - "repeats": 0 - } - ], - "profile modes": ["time"], - "analysis": "scale", - "mode": "validate", - "x-axis": "Chains of parallel tasks (X5 tasks per chain X2 cycles)" -} diff --git a/etc/profile-experiments/busy.json b/etc/profile-experiments/busy.json deleted file mode 100644 index 95296ce35e0..00000000000 --- a/etc/profile-experiments/busy.json +++ /dev/null @@ -1,61 +0,0 @@ -{ - "runs": [ - { - "name": "1", - "suite dir": "etc/dev-suites/chains", - "options": ["tasks_per_chain=5", "chains=1", "batch_system=at"], - "repeats": 1 - }, - { - "name": "2", - "suite dir": "etc/dev-suites/chains", - "options": ["tasks_per_chain=5", "chains=2", "batch_system=at"], - "repeats": 1 - }, - { - "name": "5", - "suite dir": "etc/dev-suites/chains", - "options": ["tasks_per_chain=5", "chains=5", "batch_system=at"], - "repeats": 0 - }, - { - "name": "10", - "suite dir": "etc/dev-suites/chains", - "options": ["tasks_per_chain=5", "chains=10", "batch_system=at"], - "repeats": 0 - }, - { - "name": "25", - "suite dir": "etc/dev-suites/chains", - "options": ["tasks_per_chain=5", "chains=25", "batch_system=at"], - "repeats": 0 - }, - { - "name": "50", - "suite dir": "etc/dev-suites/chains", - "options": ["tasks_per_chain=5", "chains=50", "batch_system=at"], - "repeats": 0 - }, - { - "name": "75", - "suite dir": "etc/dev-suites/chains", - "options": ["tasks_per_chain=5", "chains=75", "batch_system=at"], - "repeats": 0 - }, - { - "name": "100", - "suite dir": "etc/dev-suites/chains", - "options": ["tasks_per_chain=5", "chains=100", "batch_system=at"], - "repeats": 0 - }, - { - "name": "200", - "suite dir": "etc/dev-suites/chains", - "options": ["tasks_per_chain=5", "chains=200", "batch_system=at"], - "repeats": 0 - } - ], - "profile modes": ["time"], - "x-axis": "Chains of parallel tasks (X5 tasks per chain X2 cycles)", - "analysis": "scale" -} diff --git a/etc/profile-experiments/complex-validate.json b/etc/profile-experiments/complex-validate.json deleted file mode 100644 index 57bd947f836..00000000000 --- a/etc/profile-experiments/complex-validate.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "runs": [ - { - "name": "complex suite", - "suite dir": "etc/dev-suites/complex" - } - ], - "profile modes": ["time"], - "mode": "validate" -} diff --git a/etc/profile-experiments/complex.json b/etc/profile-experiments/complex.json deleted file mode 100644 index 9f4d547b7a6..00000000000 --- a/etc/profile-experiments/complex.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "runs": [ - { - "name": "complex suite", - "suite dir": "etc/dev-suites/complex", - "options": ["batch_system=at", "sleep_time=1"] - } - ], - "profile modes": ["time"] -} diff --git a/etc/profile-experiments/diamond-validate.json b/etc/profile-experiments/diamond-validate.json deleted file mode 100644 index 5d9c493fb13..00000000000 --- a/etc/profile-experiments/diamond-validate.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "runs": [ - { - "name": "10", - "suite dir": "etc/dev-suites/diamond", - "options": ["cycles=5", "tasks=10"], - "repeats": 2 - }, - { - "name": "50", - "suite dir": "etc/dev-suites/diamond", - "options": ["cycles=5", "tasks=50"], - "repeats": 1 - }, - { - "name": "100", - "suite dir": "etc/dev-suites/diamond", - "options": ["cycles=5", "tasks=100"], - "repeats": 0 - }, - { - "name": "250", - "suite dir": "etc/dev-suites/diamond", - "options": ["cycles=5", "tasks=250"], - "repeats": 0 - }, - { - "name": "500", - "suite dir": "etc/dev-suites/diamond", - "options": ["cycles=5", "tasks=500"], - "repeats": 0 - }, - { - "name": "750", - "suite dir": "etc/dev-suites/diamond", - "options": ["cycles=5", "tasks=750"], - "repeats": 0 - }, - { - "name": "1000", - "suite dir": "etc/dev-suites/diamond", - "options": ["cycles=5", "tasks=1000"], - "repeats": 0 - } - ], - "profile modes": ["time"], - "mode": "validate", - "analysis": "scale" -} diff --git a/etc/profile-experiments/diamond.json b/etc/profile-experiments/diamond.json deleted file mode 100644 index 1d569077436..00000000000 --- a/etc/profile-experiments/diamond.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "runs": [ - { - "name": "10", - "suite dir": "etc/dev-suites/diamond", - "options": ["cycles=5", "tasks=10", "batch_system=at"], - "repeats": 2 - }, - { - "name": "50", - "suite dir": "etc/dev-suites/diamond", - "options": ["cycles=5", "tasks=50", "batch_system=at"], - "repeats": 1 - }, - { - "name": "100", - "suite dir": "etc/dev-suites/diamond", - "options": ["cycles=5", "tasks=100", "batch_system=at"], - "repeats": 0 - }, - { - "name": "250", - "suite dir": "etc/dev-suites/diamond", - "options": ["cycles=5", "tasks=250", "batch_system=at"], - "repeats": 0 - }, - { - "name": "500", - "suite dir": "etc/dev-suites/diamond", - "options": ["cycles=5", "tasks=500", "batch_system=at"], - "repeats": 0 - }, - { - "name": "750", - "suite dir": "etc/dev-suites/diamond", - "options": ["cycles=5", "tasks=750", "batch_system=at"], - "repeats": 0 - }, - { - "name": "1000", - "suite dir": "etc/dev-suites/diamond", - "options": ["cycles=5", "tasks=1000", "batch_system=at"], - "repeats": 0 - } - ], - "profile modes": ["time"], - "analysis": "scale" -} diff --git a/etc/profile-experiments/example b/etc/profile-experiments/example deleted file mode 100644 index 09ef6cd528b..00000000000 --- a/etc/profile-experiments/example +++ /dev/null @@ -1,63 +0,0 @@ -# Example of experiment definition (NOTE: json does not support comments) - -{ - # A list of runs where a run represents a particular profiling - # configuration. Specify one or more runs as dictionaries. - "runs": [ - { - # A name for this run, results will be stored under this name. - # If using "analysis = scale" this name should be an integer value - # provided as a string (e.g. name = "12") - "name": "hello world suite", - - # Path to the suite directory. Relative paths will be prefixed - # the path to the cylc working copy. - "suite dir": "~/roses/hello_world", - - # For providing jinja2 variables to your suite.rc - # --- optional, default=[] --- - # NOTE: A convention upheld by profile-battery mode for jinja2 - # variables is: - # - `batch_system`: Used for [task][job]batch system = _ - # - `sleep_time`: User for [task]script = sleep _ - "options": ["setting=value", "foo=bar"], - - # Config items for the global.rc file this suite is run with. - # --- optional, default=[] --- - "globalrc": ['[cylc][events]mail events=timeout'], - - # Number of REPEATS to perform, if zero the suite will run once, - # if one it will run twice! - # --- optional, default=0 --- - "repeats": 1 - } - ], - - # System(s) to profile code: - # - `time`: Profiles the code using /usr/bin/time. - # - `cylc`: Runs the code using the --profile option (not available in all - # cylc versions. - # --- optional, default=['time'] --- - "profile modes": ["time", "cylc"], - - # Type of analysis to perform: - # - `single`: For non-scaling experiments. - # - `scale`: For scaling suites. - # --- optional, default='single' --- - "analysis": "single", - - # cylc run mode: - # - `validate`: Profiles with `cylc validate ` rather than - # `cylc run `. - # - `profile-simulation`: Manually overwrites all script to sleep 1 removes - # any pre/post script, sets host to localhost and job-submission method - # to background then runs `cylc run --mode=live`. - # - ``: Profiles `cylc run` with `--mode=`. - # --- optional, default=None (equivilent to `live`) --- - "mode": "profile-simulation", - - # For experiments using analysis=scale, the label for the x-axis of - # produced plots. - # --- optional, default='Tasks' --- - "x-axis": "X axis title" -} diff --git a/etc/profile-experiments/experiment.json b/etc/profile-experiments/experiment.json deleted file mode 100644 index 011a239c7f3..00000000000 --- a/etc/profile-experiments/experiment.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "runs": [ - { - "name": "hello world suite", - "suite dir": "~/roses/hello_world" - } - ], - "profile modes": ["time", "cylc"] -} diff --git a/etc/profile-experiments/family-trigger-validate.json b/etc/profile-experiments/family-trigger-validate.json deleted file mode 100644 index c24aa850c62..00000000000 --- a/etc/profile-experiments/family-trigger-validate.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "runs": [ - { - "name": "10", - "suite dir": "etc/dev-suites/family-triggers", - "options": ["batch_system=at", "no_members=10"], - "repeats": 3 - }, - { - "name": "25", - "suite dir": "etc/dev-suites/family-triggers", - "options": ["batch_system=at", "no_members=20"], - "repeats": 2 - }, - { - "name": "50", - "suite dir": "etc/dev-suites/family-triggers", - "options": ["batch_system=at", "no_members=50"], - "repeats": 1 - }, - { - "name": "75", - "suite dir": "etc/dev-suites/family-triggers", - "options": ["batch_system=at", "no_members=75"], - "repeats": 0 - }, - { - "name": "100", - "suite dir": "etc/dev-suites/family-triggers", - "options": ["batch_system=at", "no_members=100"], - "repeats": 0 - }, - { - "name": "150", - "suite dir": "etc/dev-suites/family-triggers", - "options": ["batch_system=at", "no_members=150"], - "repeats": 0 - }, - { - "name": "200", - "suite dir": "etc/dev-suites/family-triggers", - "options": ["batch_system=at", "no_members=200"], - "repeats": 0 - }, - { - "name": "250", - "suite dir": "etc/dev-suites/family-triggers", - "options": ["batch_system=at", "no_members=250"], - "repeats": 0 - } - ], - "analysis": "scale", - "profile modes": ["time" ], - "mode": "validate" -} diff --git a/etc/profile-experiments/hello-world-validate.json b/etc/profile-experiments/hello-world-validate.json deleted file mode 100644 index f5ae2ac96b0..00000000000 --- a/etc/profile-experiments/hello-world-validate.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "runs": [ - { - "name": "run", - "suite dir": "etc/dev-suites/hello-world", - "repeats": 10 - } - ], - "mode": "validate" -} diff --git a/etc/profile-experiments/hello-world.json b/etc/profile-experiments/hello-world.json deleted file mode 100644 index e956b8eb79f..00000000000 --- a/etc/profile-experiments/hello-world.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "runs": [ - { - "name": "run", - "suite dir": "etc/dev-suites/hello-world", - "repeats": 10 - } - ] -} diff --git a/etc/profile-experiments/lazy-validate.json b/etc/profile-experiments/lazy-validate.json deleted file mode 100644 index 38d2a2b14a1..00000000000 --- a/etc/profile-experiments/lazy-validate.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "runs": [ - { - "name": "10", - "suite dir": "etc/dev-suites/chains", - "options": ["chains=1", "tasks_per_chain=10"], - "repeats": 0 - }, - { - "name": "50", - "suite dir": "etc/dev-suites/chains", - "options": ["chains=1", "tasks_per_chain=50"], - "repeats": 0 - }, - { - "name": "100", - "suite dir": "etc/dev-suites/chains", - "options": ["chains=1", "tasks_per_chain=100"], - "repeats": 0 - }, - { - "name": "250", - "suite dir": "etc/dev-suites/chains", - "options": ["chains=1", "tasks_per_chain=250"], - "repeats": 0 - }, - { - "name": "500", - "suite dir": "etc/dev-suites/chains", - "options": ["chains=1", "tasks_per_chain=500"], - "repeats": 0 - }, - { - "name": "750", - "suite dir": "etc/dev-suites/chains", - "options": ["chains=1", "tasks_per_chain=750"], - "repeats": 0 - }, - { - "name": "1000", - "suite dir": "etc/dev-suites/chains", - "options": ["chains=1", "tasks_per_chain=1000"], - "repeats": 0 - } - ], - "profile modes": ["time"], - "analysis": "scale", - "mode": "validate", - "x-axis": "Sequential Tasks (X3 cycles)" -} diff --git a/etc/profile-experiments/lazy.json b/etc/profile-experiments/lazy.json deleted file mode 100644 index 1a7b224a876..00000000000 --- a/etc/profile-experiments/lazy.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "runs": [ - { - "name": "10", - "suite dir": "etc/dev-suites/chains", - "options": ["chains=1", "tasks_per_chain=10", "batch_system=at"], - "repeats": 0 - }, - { - "name": "50", - "suite dir": "etc/dev-suites/chains", - "options": ["chains=1", "tasks_per_chain=50", "batch_system=at"], - "repeats": 0 - }, - { - "name": "100", - "suite dir": "etc/dev-suites/chains", - "options": ["chains=1", "tasks_per_chain=100", "batch_system=at"], - "repeats": 0 - }, - { - "name": "250", - "suite dir": "etc/dev-suites/chains", - "options": ["chains=1", "tasks_per_chain=250", "batch_system=at"], - "repeats": 0 - }, - { - "name": "500", - "suite dir": "etc/dev-suites/chains", - "options": ["chains=1", "tasks_per_chain=500", "batch_system=at"], - "repeats": 0 - }, - { - "name": "750", - "suite dir": "etc/dev-suites/chains", - "options": ["chains=1", "tasks_per_chain=750", "batch_system=at"], - "repeats": 0 - }, - { - "name": "1000", - "suite dir": "etc/dev-suites/chains", - "options": ["chains=1", "tasks_per_chain=1000", "batch_system=at"], - "repeats": 0 - } - ], - "profile modes": ["time"], - "analysis": "scale", - "x-axis": "Sequential Tasks (X2 cycles)" -} diff --git a/etc/profile-experiments/profile-simulation/suite.rc b/etc/profile-experiments/profile-simulation/suite.rc deleted file mode 100644 index f27ddcf6271..00000000000 --- a/etc/profile-experiments/profile-simulation/suite.rc +++ /dev/null @@ -1,42 +0,0 @@ -#!jinja2 - -# This config get appended to the suite config when profiling with -# mode=profile-simulation where namespaces is a list of all tasks present in -# the suite config. -# -# The jinja2 variables sleep_time and batch_system can be provided in the -# options section of an experiment run (e.g. options=["sleep_time=1"]). -# -# The jinja2 variable cylc_compat_mode is provided automatically and contains -# the major version number of the cylc version which will run the suite. - -{% if not sleep_time is defined %} - {% set sleep_time = '1' %} -{% endif %} -{% if not batch_system is defined %} - {% set batch_system = 'background' %} -{% endif %} -{% if namespaces is string %} - {% set namespaces = [namespaces] %} -{% endif %} - -# The runtime to overwrite. -[runtime] -{% for namespace in namespaces %} - [[{{namespace}}]] - {% if cylc_compat_mode is defined and cylc_compat_mode == '6' %} - pre-command scripting = - command scripting = sleep {{sleep_time}} - post-command scripting = - [[[job submission]]] - method = {{batch_system}} - {% else %} - pre-script = - script = sleep {{sleep_time}} - post-script = - [[[job]]] - batch system = {{batch_system}} - {% endif %} - [[[remote]]] - host = localhost -{% endfor %} diff --git a/etc/profile-experiments/test.json b/etc/profile-experiments/test.json deleted file mode 100644 index 2625ddb0b7e..00000000000 --- a/etc/profile-experiments/test.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "runs": [ - { - "name": "chains", - "suite dir": "etc/dev-suites/chains", - "repeats": 1, - "options": ["batch_system=at", "chains=3", "tasks_per_chain=1"] - }, - { - "name": "diamond", - "suite dir": "etc/dev-suites/diamond", - "options": ["tasks=3", "cycles=1"] - } - ], - "profile modes": ["time", "cylc"], - "analysis": "single", - "mode": "profile-simulation" -} diff --git a/lib/cylc/profiling/__init__.py b/lib/cylc/profiling/__init__.py deleted file mode 100644 index 10e5143efb9..00000000000 --- a/lib/cylc/profiling/__init__.py +++ /dev/null @@ -1,128 +0,0 @@ -# THIS FILE IS PART OF THE CYLC SUITE ENGINE. -# Copyright (C) 2008-2019 NIWA & British Crown (Met Office) & Contributors. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -"""Module used for profiling different cylc versions.""" - -import os -import re -from subprocess import Popen, PIPE -import sys - -from .git import is_git_repo - - -def get_cylc_directory(): - """Returns the location of cylc's working copy.""" - ver_str = Popen( - ['cylc', 'version', '--long'], - stdout=PIPE, stdin=open(os.devnull)).communicate()[0].decode() - try: - return os.path.realpath(re.search(r'\((.*)\)', ver_str).groups()[0]) - except IndexError: - sys.exit('Could not locate local git repository for cylc.') - - -# Ensure that the cylc directory is a git repository. -CYLC_DIR = get_cylc_directory() -os.chdir(CYLC_DIR) -IS_GIT_REPO = is_git_repo() - -# Files and directories -PROFILE_DIR_NAME = '.profiling' # Path to profiling directory. -PROFILE_FILE_NAME = 'results.json' # Path to profiling results file -PROFILE_PLOT_DIR_NAME = 'plots' # Path to default plotting directory. -USER_EXPERIMENT_DIR_NAME = 'experiments' # Path to user defined experiments. -EXPERIMENTS_PATH = os.path.join('etc', 'profile-experiments' - ) # Path to built-in experiments. - -# Ancestor commit for cylc profile-battery -PROFILE_COMMIT = '0f5a7999ba9c93174d846a6679db4ce413388df7' - -# Ancestor commit for analysis-compatible cylc (run|validate) --profile -CYLC_PROFILING_COMMIT = '016e6a97be16eaf1a33ea19398a1ade09f86719e' - -# Profiling config. -PROFILE_MODE_TIME = 'PROFILE_MODE_TIME' -PROFILE_MODE_CYLC = 'PROFILE_MODE_CYLC' -PROFILE_MODES = {'time': PROFILE_MODE_TIME, - 'cylc': PROFILE_MODE_CYLC} - -# Profile file suffixes. -PROFILE_FILES = { - 'cmd-out': '', - 'cmd-err': '-cmd.err', - 'time-err': '-time.err', - 'startup': '-startup' -} - - -# ------------- REGEXES --------------- -# Matches the summary line from the cylc --profile output. -SUMMARY_LINE_REGEX = re.compile( - r'([\d]+) function calls \(([\d]+) primitive' - r' calls\) in ([\d.]+)(?: CPU)? seconds') -# Matches the memory checkpoints in the cylc --profile output -MEMORY_LINE_REGEX = re.compile( - r'PROFILE: Memory: ([\d]+) KiB: ([\w.]+): (.*)') -# Matches main-loop memory checkpoints in cylc --profile output. -LOOP_MEMORY_LINE_REGEX = re.compile( - r'(?:loop #|end main loop \(total loops )([\d]+)(?:: |\): )(.*)') -# Matches the sleep function line in cylc --profile output. -SLEEP_FUNCTION_REGEX = re.compile( - r'([\d.]+)[\s]+[\d.]+[\s]+\{built-in method time.sleep\}') -# The string prefixing the suite-startup timestamp (unix time). -SUITE_STARTUP_STRING = 'SUITE STARTUP: ' - - -# -------------- METRICS --------------- -METRIC_TITLE = 0 # For display purposes. -METRIC_UNIT = 1 # For display purposes. -METRIC_FILENAME = 2 # For output plots (no extension). -METRIC_FIELDS = 3 # Fields metrics can be derived from in order of preference. -METRICS = { # Dict of all metrics measured by profile-battery. - '001': ('Elapsed Time', 's', 'elapsed-time', [ - 'real', 'Elapsed (wall clock) time (h:mm:ss or m:ss)', - 'cpu time'],), - '002': ('CPU Time - Total', 's', 'cpu-time', ['total cpu time'],), - '003': ('CPU Time - User', 's', 'user-time', [ - 'user', 'User time (seconds)'],), - '004': ('CPU Time - System', 's', 'system-time', [ - 'sys', 'System time (seconds)'],), - '005': ('Max Memory', 'kb', 'memory', [ - 'maximum resident set size', 'Maximum resident set size (kbytes)', - 'mxmem'],), - '006': ('File System - Inputs', None, 'file-ins', [ - 'block input operations', 'File system inputs'],), - '007': ('File System - Outputs', None, 'file-outs', [ - 'block output operations', 'File system outputs'],), - '008': ('Startup Time', 's', 'startup-time', ['startup time'],), - '009': ('Number Of Main Loop Iterations', None, 'loop-count', [ - 'loop count'],), - '010': ('Average Main Loop Iteration Time', 's', 'loop-time', [ - 'avg loop time'],), - '011': ('Elapsed Time - time.sleep()', 's', 'awake-time', [ - 'awake cpu time'],) -} -# Metrics used if --full is not set. -QUICK_ANALYSIS_METRICS = set(['001', '002', '005']) -# Reverse lookup of METRICS, dict of fields stored with their metric codes. -METRICS_BY_FIELD = {} -for metric in METRICS: - for field in METRICS[metric][METRIC_FIELDS]: - METRICS_BY_FIELD[field] = metric - - -# The profile mode(s) to use if un-specified. -DEFAULT_PROFILE_MODES = ['time'] diff --git a/lib/cylc/profiling/analysis.py b/lib/cylc/profiling/analysis.py deleted file mode 100644 index 5062a3904ee..00000000000 --- a/lib/cylc/profiling/analysis.py +++ /dev/null @@ -1,469 +0,0 @@ -# THIS FILE IS PART OF THE CYLC SUITE ENGINE. -# Copyright (C) 2008-2019 NIWA & British Crown (Met Office) & Contributors. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -"""Module for performing analysis on profiling results and generating plots.""" - -import os -import re -import sys - -# Import modules required for plotting if available. -try: - import numpy - import warnings - warnings.simplefilter('ignore', numpy.RankWarning) - import matplotlib.cm as colour_map - import matplotlib.pyplot as plt - CAN_PLOT = True -except (ImportError, RuntimeError): - CAN_PLOT = False - -from cylc.wallclock import get_unix_time_from_time_string - -from . import (PROFILE_MODE_TIME, PROFILE_MODE_CYLC, SUMMARY_LINE_REGEX, - MEMORY_LINE_REGEX, LOOP_MEMORY_LINE_REGEX, SLEEP_FUNCTION_REGEX, - SUITE_STARTUP_STRING, PROFILE_MODES, PROFILE_FILES, METRICS, - METRIC_TITLE, METRIC_UNIT, METRIC_FILENAME, METRIC_FIELDS, - QUICK_ANALYSIS_METRICS) -from .git import (order_versions_by_date, describe) - - -def mean(data): - """Return the mean average of a list of numbers.""" - return sum(data) / float(len(data)) - - -def remove_profile_from_versions(versions): - """Handles any versions with "-profile" in the version_name. - - Removes -profile* from the version name then sorts the versions by date - (using the new version names). - """ - ret = list(versions) - flag = False - temp = {} - for version in ret: - try: - # Remove -profile from version_name if present. - ind = version['name'].index('-profile') - version['name'] = version['name'][:ind] - version_id = describe(version['name']) - # Temporally change the version_id to match the version_name. - if version_id: - temp[version['name']] = version['id'] - version['id'] = version_id - except ValueError: - continue - else: - flag = True - if flag: - # Sort versions by date. - order_versions_by_date(ret) - # Revert version_ids. - for version in ret: - if version['name'] in temp: - version['id'] = temp[version['name']] - return ret - # No -profile versions, return the original list. - return versions - - -def extract_results(result_file_dict, exp): - """Extract results from the result files output by each run.""" - validate_mode = exp.get('validate_mode', False) - profile_modes = [PROFILE_MODES[mode] for mode in exp['profile modes']] - - results = {} - data = {} - for run_name, result_files in result_file_dict.items(): - data[run_name] = [] - for result_file in result_files: - profiling_results = {} - if PROFILE_MODE_TIME in profile_modes: - profiling_results.update(process_time_file( - result_file + PROFILE_FILES['time-err'])) - if PROFILE_MODE_CYLC in profile_modes: - suite_start_time = None - if not validate_mode: - suite_start_time = get_startup_time( - result_file + PROFILE_FILES['startup']) - profiling_results.update(process_out_file( - result_file + PROFILE_FILES['cmd-out'], suite_start_time, - validate_mode)) - data[run_name].append(profiling_results) - - results = process_results(data) - return results - - -def get_startup_time(file_name): - """Return the value of the "SUITE STARTUP" entry as a string.""" - with open(file_name, 'r') as startup_file: - return re.search( - 'SUITE STARTUP: (.*)', startup_file.read()).groups()[0] - - -def process_time_file(file_name): - """Extracts results from a result file generated using the /usr/bin/time - profiler.""" - with open(file_name, 'r') as time_file: - ret = {} - for line in time_file: - try: - field, value = line.strip().rsplit(': ', 1) - except ValueError: - print('ERROR: Could not parse line "%s"' % line.strip()) - continue - try: # Try to cast as integer. - ret[field] = int(value) - except ValueError: - try: # Try to cast as float. - ret[field] = float(value) - except ValueError: - if value.endswith('%'): # Remove trailing % symbol - try: # Try to cast as integer. - ret[field] = int(value[:-1]) - except ValueError: # Try to cast as float. - ret[field] = float(value[:-1]) - elif ':' in value: # Is a time of form h:m:s or m:s - seconds = 0. - increment = 1. - for time_field in reversed(value.split(':')): - seconds += float(time_field) * increment - increment *= 60 - ret[field] = seconds - else: # Cannot parse. - if 'Command being timed' not in line: - print('ERROR: Could not parse value "%s"' % line) - ret[field] = value - if sys.platform == 'darwin': # MacOS - ret['total cpu time'] = (ret['user'] + ret['sys']) - else: # Assume Linux - ret['total cpu time'] = (ret['User time (seconds)'] + - ret['System time (seconds)']) - return ret - - -def process_out_file(file_name, suite_start_time, validate=False): - """Extract data from the out log file.""" - if not os.path.exists(file_name): - sys.exit('No file with path {0}'.format(file_name)) - with open(file_name, 'r') as out_file: - ret = {} - lines = out_file.readlines() - - # Get start time. - if lines[0].startswith(SUITE_STARTUP_STRING): - ret['suite start time'] = float( - lines[0][len(SUITE_STARTUP_STRING):]) - - # Scan through log entries. - ret['memory'] = [] - loop_mem_entries = [] - for line in lines: - # Profile summary. - match = SUMMARY_LINE_REGEX.search(line) - if match: - ret['function calls'] = int(match.groups()[0]) - ret['primitive function calls'] = int(match.groups()[1]) - ret['cpu time'] = float(match.groups()[2]) - continue - - # Memory info. - match = MEMORY_LINE_REGEX.search(line) - if match: - memory, module, checkpoint = tuple(match.groups()) - ret['memory'].append((module, checkpoint, int(memory),)) - - # Main loop memory info. - if not validate: - match = LOOP_MEMORY_LINE_REGEX.search(checkpoint) - if match: - loop_no, time_str = match.groups() - loop_mem_entries.append(( - int(loop_no), - int(get_unix_time_from_time_string(time_str)), - )) - continue - - # Sleep time. - match = SLEEP_FUNCTION_REGEX.search(line) - if match: - ret['sleep time'] = float(match.groups()[0]) - continue - - # Number of loops. - if not validate: - ret['loop count'] = loop_mem_entries[-1][0] - ret['avg loop time'] = (float(loop_mem_entries[-1][1] - - loop_mem_entries[0][1]) / - loop_mem_entries[-1][0]) - - # Maximum memory usage. - ret['mxmem'] = max(entry[2] for entry in ret['memory']) - - # Startup time (time from running cmd to reaching the end of the first - # loop). - if not validate: - ret['startup time'] = (loop_mem_entries[0][1] - - round(float(suite_start_time), 1)) - - # Awake CPU time. - if not validate: - ret['awake cpu time'] = (ret['cpu time'] - ret['sleep time']) - - return ret - - -def process_results(results): - """Average over results for each run.""" - processed_results = {} - all_metrics = set(METRICS.keys()) - for run_name, run in results.items(): - processed_results[run_name] = {} - this_result = dict((metric, []) for metric in all_metrics) - for result in run: - for metric in all_metrics: - for field in METRICS[metric][METRIC_FIELDS]: - if field in result: - this_result[metric].append(result[field]) - all_metrics = all_metrics & set(this_result.keys()) - for metric in all_metrics: - if this_result[metric]: - processed_results[run_name][metric] = mean(this_result[metric]) - for metric in set(METRICS.keys()) - all_metrics: - for run_name, run in processed_results.items(): - del run[metric] - return processed_results - - -def get_metrics_for_experiment(experiment, results, quick_analysis=False): - """Return a set of metric keys present in the results for experiment - - If a metric is missing from one result it is skipped. - - """ - metrics = set([]) - for version_id in results: - if experiment['id'] in results[version_id]: - for run in results[version_id][experiment['id']].values(): - if metrics: - metrics = metrics & set(run.keys()) - else: - metrics = set(run.keys()) - if quick_analysis: - return metrics & QUICK_ANALYSIS_METRICS - return metrics - - -def get_metric_title(metric): - """Return a user-presentable title for a given metric key.""" - metric_title = METRICS[metric][METRIC_TITLE] - metric_unit = METRICS[metric][METRIC_UNIT] - if metric_unit: - metric_title += ' (' + metric_unit + ')' - return metric_title - - -def make_table(results, versions, experiment, quick_analysis=False): - """Produce a 2D array representing the results of the provided - experiment.""" - metrics = get_metrics_for_experiment(experiment, results, - quick_analysis=quick_analysis) - - # Make header rows. - table = [['Version', 'Run'] + [get_metric_title(metric) for metric in - sorted(metrics)]] - - # Make content rows. - try: - for version in versions: - data = results[version['id']][experiment['id']] - run_names = list(data) - try: - run_names.sort(key=int) - except ValueError: - run_names.sort() - for run_name in run_names: - table.append([version['name'], run_name] + - [data[run_name][metric] for metric in - sorted(metrics)]) - except ValueError: - print('ERROR: Data is not complete. Try removing results and ' - 're-running any experiments') - - return table - - -def print_table(table, transpose=False): - """Print a 2D list as a table. - - None values are printed as hyphens, use '' for blank cells. - """ - if transpose: - table = list(map(list, list(zip(*table)))) - if not table: - return - for row_no, _ in enumerate(table): - for col_no, _ in enumerate(table[0]): - cell = table[row_no][col_no] - if cell is None: - table[row_no][col_no] = [] - else: - table[row_no][col_no] = str(cell) - - col_widths = [] - for col_no, _ in enumerate(table[0]): - col_widths.append( - max(len(table[row_no][col_no]) for row_no, _ in enumerate(table))) - - for row_no, _ in enumerate(table): - for col_no, _ in enumerate(table[row_no]): - if col_no != 0: - sys.stdout.write(' ') - cell = table[row_no][col_no] - if isinstance(cell, list): - sys.stdout.write('-' * col_widths[col_no]) - else: - sys.stdout.write(cell + ' ' * (col_widths[col_no] - len(cell))) - sys.stdout.write('\n') - - -def plot_single(results, run_names, versions, metric, experiment, - axis, c_map): - """Create a bar chart comparing the results of all runs.""" - n_groups = len(versions) - n_bars = len(run_names) - ind = numpy.arange(n_groups) - spacing = 0.1 - width = (1. - spacing) / n_bars - colours = [c_map(x / (n_bars - 0.99)) for x in range(n_bars)] - - for bar_no, run_name in enumerate(run_names): - data = [results[version['id']][experiment['id']][run_name][metric] - for version in versions] - axis.bar(ind + (bar_no * width), data, width, label=run_name, - color=colours[bar_no]) - - axis.set_xticks(ind + ((width * n_bars) / 2.)) - axis.set_xticklabels([version['name'] for version in versions]) - axis.set_xlabel('Cylc Version') - axis.set_xlim([0, (1. * n_groups) - spacing]) - if len(run_names) > 1: - axis.legend(loc='upper left', prop={'size': 9}) - - -def plot_scale(results, run_names, versions, metric, experiment, - axis, c_map, lobf_order=2): - """Create a scatter plot with line of best fit interpreting float(run_name) - as the x-axis value.""" - x_data = [int(run_name) for run_name in run_names] - colours = [c_map(x / (len(versions) - 0.99)) - for x, _ in enumerate(versions)] - - for ver_no, version in enumerate(reversed(versions)): - y_data = [] - for run_name in run_names: - y_data.append( - results[version['id']][experiment['id']][run_name][metric] - ) - - # Plot data point. - if lobf_order >= 1: - axis.plot(x_data, y_data, 'x', color=colours[ver_no]) - else: - axis.plot(x_data, y_data, 'x', color=colours[ver_no], - label=version['name']) - - # Compute and plot line of best fit. - if lobf_order >= 1: - if lobf_order > 8: - print(('WARNING: Line of best fit order too high (' + - lobf_order + '). Order has been set to 3.')) - lobf_order = 3 - lobf = numpy.polyfit(x_data, y_data, lobf_order) - line = numpy.linspace(x_data[0], x_data[-1], 100) - points = numpy.poly1d(lobf)(line) - axis.plot(line, points, '-', color=colours[ver_no], - label=version['name']) - - # Plot settings. - axis.set_xlabel(experiment['config']['x-axis'] if 'x-axis' in - experiment['config'] else 'Tasks') - axis.legend(loc='upper left', prop={'size': 9}) - - -def plot_results(results, versions, experiment, plt_dir=None, - quick_analysis=False, lobf_order=2): - """Plot the results for the provided experiment. - - By default plots are - written out to plt_dir. If not plt_dir then the plots will be displayed - interactively. - - Args: - results (dict): The data contained in the profiling results file. - versions (list): List of version dictionaries for versions to plot. - experiment (dict): Experiment dict for the experiment to plot. - plt_dir (str): Directory to render any plots into. - quick_analysis (bool - optional): If True only a small set of metrics - will be plotted. - lobf_order (int - optional): The polynomial order for the line of best - fit, will be used for ALL plots. - - """ - # Are we able to plot? - if not CAN_PLOT: - print('\nWarning: Plotting requires numpy and maplotlib so cannot be ' - 'run.') - return - - versions = remove_profile_from_versions(versions) - - metrics = get_metrics_for_experiment(experiment, results, - quick_analysis=quick_analysis) - run_names = [run['name'] for run in experiment['config']['runs']] - plot_type = experiment['config']['analysis'] - - c_map = colour_map.Set1 - - # One plot per metric. - for metric in metrics: - # Set up plotting. - fig = plt.figure(111) - axis = fig.add_subplot(111) - - if plot_type == 'single': - plot_single(results, run_names, versions, metric, - experiment, axis, c_map) - elif plot_type == 'scale': - plot_scale(results, run_names, versions, metric, - experiment, axis, c_map, lobf_order=lobf_order) - - # Common config. - axis.grid(True) - axis.set_ylabel(get_metric_title(metric)) - - # Output graph. - if not plt_dir: - # Output directory not specified, use interactive mode. - plt.show() - else: - # Output directory specified, save figure as a pdf. - fig.savefig(os.path.join(plt_dir, - METRICS[metric][METRIC_FILENAME] + - '.pdf')) - - fig.clear() diff --git a/lib/cylc/profiling/git.py b/lib/cylc/profiling/git.py deleted file mode 100644 index 926cb64088f..00000000000 --- a/lib/cylc/profiling/git.py +++ /dev/null @@ -1,107 +0,0 @@ -# THIS FILE IS PART OF THE CYLC SUITE ENGINE. -# Copyright (C) 2008-2019 NIWA & British Crown (Met Office) & Contributors. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -"""Provides python wrappers to certain git commands.""" - -import os -from subprocess import Popen, PIPE, CalledProcessError, check_call - - -class GitCheckoutError(Exception): - """Exception to be raised if a git checkout command fails.""" - pass - - -def describe(ref=None): - """Returns stdout of the `git describe ` command.""" - try: - cmd = ['git', 'describe', '--tags', '--always'] - if ref: - cmd.append(ref) - return Popen( - cmd, stdin=open(os.devnull), stdout=PIPE - ).communicate()[0].decode().strip() - except CalledProcessError: - return None - - -def is_ancestor_commit(commit1, commit2): - """Returns True if commit1 is an ancestor of commit2.""" - try: - ancestor = Popen( - ['git', 'merge-base', commit1, commit2], - stdin=open(os.devnull), stdout=PIPE - ).communicate()[0].decode().strip() - return ancestor == commit1 - except CalledProcessError: - return False - - -def checkout(branch, delete_pyc=False): - """Checkouts the git branch with the provided name.""" - try: - cmd = ['git', 'checkout', '-q', branch] - print('$ ' + ' '.join(cmd)) - check_call(cmd, stdin=open(os.devnull)) - except CalledProcessError: - raise GitCheckoutError() - try: - if delete_pyc: - cmd = ['find', 'lib', '-name', r'\*.pyc', '-delete'] - print('$ ' + ' '.join(cmd)) - check_call( - cmd, stdin=open(os.devnull), stdout=open(os.devnull, 'wb')) - except CalledProcessError: - pass - - -def get_commit_date(commit): - """Returns the commit date (in unix time) of the provided commit.""" - try: - return int(Popen(['git', 'show', '-s', '--format=%at', commit], - stdout=PIPE, stderr=PIPE - ).communicate()[0].decode().split()[-1]) - except IndexError: - get_commit_date(commit.split('-')[0]) - - -def order_versions_by_date(versions): - """Orders a list of version objects by the date of the most recent - commit.""" - versions.sort(key=lambda x: get_commit_date(x['id'])) - - -def order_identifiers_by_date(versions): - """Orders a list of git identifiers by the date of the most recent - commit.""" - versions.sort(key=get_commit_date) - - -def has_changes_to_be_committed(): - """Returns True if there are any un-committed changes to the working - copy.""" - git_status = Popen( - ['git', 'status'], stdout=PIPE).communicate()[0].decode() - if 'Changes to be committed' in git_status: - return True - if 'Changed but not updated' in git_status: - return True - return False - - -def is_git_repo(): - """Returns true if we are currently within a git repository.""" - proc = Popen(['git', 'rev-parse', '--git-dir'], stdout=PIPE, stderr=PIPE,) - return proc.wait() == 0 diff --git a/lib/cylc/profiling/profile.py b/lib/cylc/profiling/profile.py deleted file mode 100644 index d42ce5396fe..00000000000 --- a/lib/cylc/profiling/profile.py +++ /dev/null @@ -1,349 +0,0 @@ -# THIS FILE IS PART OF THE CYLC SUITE ENGINE. -# Copyright (C) 2008-2019 NIWA & British Crown (Met Office) & Contributors. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -"""Performs profiling of cylc. - -System calls to cylc are performed here. - -""" - -import os -import shutil -from subprocess import Popen, PIPE, call -import sys -import tempfile -import time -import traceback - -from . import (PROFILE_MODE_TIME, PROFILE_MODE_CYLC, PROFILE_MODES, - PROFILE_FILES, SUITE_STARTUP_STRING) -from .analysis import extract_results -from .git import (checkout, describe, GitCheckoutError,) - - -def cylc_env(cylc_conf_path=''): - """Provide an environment for executing cylc commands in.""" - env = os.environ.copy() - env['CYLC_CONF_PATH'] = cylc_conf_path - return env - - -CLEAN_ENV = cylc_env() - - -class SuiteFailedException(Exception): - """Exception to handle the failure of a suite-run / validate command.""" - - MESSAGE = '''ERROR: "{cmd}" returned a non-zero code.' - stdout: {stdout} - stderr: {stderr}''' - - def __init__(self, cmd, stdout, stderr): - self.cmd = '$ ' + ' '.join(cmd) - self.stdout = stdout - self.stderr = stderr - Exception.__init__(self, str(self)) - - def __str__(self): - return self.MESSAGE.format(cmd=self.cmd, stdout=self.stdout, - stderr=self.stderr) - - -class ProfilingKilledException(SuiteFailedException): - """Exception to handle the event that a user has canceled profiling whilst - a suite is running.""" - pass - - -def cylc_major_version(): - """Return the first character of the cylc version e.g. '7'.""" - return Popen( - ['cylc', '--version'], env=CLEAN_ENV, stdin=open(os.devnull), - stdout=PIPE).communicate()[0].decode().strip()[0] - - -def register_suite(reg, sdir): - """Registers the suite located in sdir with the registration name reg.""" - cmd = ['cylc', 'register', reg, sdir] - print('$ ' + ' '.join(cmd)) - if not call(cmd, stdin=open(os.devnull), stdout=PIPE, env=CLEAN_ENV): - return True - print('\tFailed') - return False - - -def unregister_suite(reg): - """Unregisters the suite reg.""" - cmd = ['cylc', 'unregister', reg] - print('$ ' + ' '.join(cmd)) - call(cmd, stdin=open(os.devnull), stdout=PIPE, env=CLEAN_ENV) - - -def purge_suite(reg): - """Deletes the run directory for this suite.""" - print('$ rm -rf ' + os.path.expanduser(os.path.join('~', 'cylc-run', reg))) - try: - shutil.rmtree(os.path.expanduser(os.path.join('~', 'cylc-run', reg))) - except OSError: - return False - else: - return True - - -def run_suite(reg, options, out_file, profile_modes, mode='live', - conf_path=''): - """Runs cylc run / cylc validate on the provided suite with the requested - profiling options. - - Arguments: - reg (str): The registration of the suite to run. - options (list): List of jinja2 setting=value pairs. - out_file (str): The file to redirect stdout to. - profile_modes (list): List of profiling systems to employ - (i.e. cylc, time). - mode (str - optional): The mode to run the suite in, simulation, dummy, - live or validate. - - Returns: - str - The path to the suite stderr if any is present. - - """ - cmds = [] - env = cylc_env(cylc_conf_path=conf_path) - - # Cylc profiling, echo command start time. - if PROFILE_MODE_CYLC in profile_modes: - cmds += ['echo', SUITE_STARTUP_STRING, r'$(date +%s.%N)', '&&'] - - # /usr/bin/time profiling. - if PROFILE_MODE_TIME in profile_modes: - if sys.platform == 'darwin': # MacOS - cmds += ['/usr/bin/time', '-lp'] - else: # Assume Linux - cmds += ['/usr/bin/time', '-v'] - - # Run using `sh -c` to enable the redirection of output (darwins - # /usr/bin/time command does not have a -o option). - cmds += ['sh', '-c', "'"] - - # Cylc run. - run_cmds = [] - if mode == 'validate': - run_cmds = ['cylc', 'validate'] - elif mode == 'profile-simulation': - # In simulation mode task scripts are manually replaced with sleep 1. - run_cmds = ['cylc', 'run', '--mode', 'live'] - else: - run_cmds = ['cylc', 'run', '--mode', mode] - run_cmds += [reg] - cmds += run_cmds - - # Jinja2 params. - jinja2_params = ['-s {0}'.format(option) for option in options] - if mode == 'profile-simulation': - # Add namespaces jinja2 param (list of task names). - tmp = ['-s namespaces=root'] - namespaces = Popen( - ['cylc', 'list', reg] + jinja2_params + tmp, - stdin=open(os.devnull), stdout=PIPE, - env=env).communicate()[0].decode().split() + ['root'] - jinja2_params.append( - '-s namespaces={0}'.format(','.join(namespaces))) - cmds.extend(jinja2_params) - - # Cylc profiling. - if PROFILE_MODE_CYLC in profile_modes: - if mode == 'validate': - sys.exit('ERROR: profile_mode "cylc" not possible in validate ' - 'mode') - else: - cmds += ['--profile'] - - # No-detach mode. - if mode != 'validate': - cmds += ['--no-detach'] - - # Redirect output. - cmd_out = out_file + PROFILE_FILES['cmd-out'] - cmd_err = out_file + PROFILE_FILES['cmd-err'] - time_err = out_file + PROFILE_FILES['time-err'] - startup_file = out_file + PROFILE_FILES['startup'] - cmds += ['>', cmd_out, '2>', cmd_err] - if PROFILE_MODE_TIME in profile_modes: - cmds += ["'"] # Close shell. - - # Execute. - print('$ ' + ' '.join(cmds)) - try: - proc = Popen(' '.join(cmds), shell=True, stderr=open(time_err, 'w+'), - stdout=open(startup_file, 'w+'), env=env) - if proc.wait(): - raise SuiteFailedException(run_cmds, cmd_out, cmd_err) - except KeyboardInterrupt: - kill_cmd = ['cylc', 'stop', '--kill', reg] - print('$ ' + ' '.join(kill_cmd)) - call(kill_cmd, env=env, stdin=open(os.devnull)) - raise ProfilingKilledException(run_cmds, cmd_out, cmd_err) - - # Return cylc stderr if present. - try: - if os.path.getsize(cmd_err) > 0: - return cmd_err - except OSError: - pass - return None - - -def run_experiment(exp): - """Run the provided experiment with the currently checked-out cylc version. - - Return a dictionary of result files by run name. - - """ - profile_modes = [PROFILE_MODES[mode] for mode in exp['profile modes']] - cylc_maj_version = cylc_major_version() - result_files = {} - to_purge = [] - for run in exp['runs']: - results_for_run = [] - sdir = os.path.expanduser(run['suite dir']) - reg = 'profile-' + str(time.time()).replace('.', '') - count = 0 - while count < run['repeats'] + 1: - # Run suite. - out_file = tempfile.mkstemp()[1] - results_for_run.append(out_file) - register_suite(reg, sdir) - err_file = run_suite( - reg, - run['options'] + ['cylc_compat_mode=%s' % cylc_maj_version], - out_file, - profile_modes, - exp.get('mode', 'live'), - conf_path=run.get('globalrc', '')) - # Handle errors. - if err_file: - print(('WARNING: non-empty suite error log: ' - + err_file), file=sys.stderr) - # Tidy up. - if cylc_maj_version == '6': - unregister_suite(reg) - if not purge_suite(reg): - # Remove suite run dirs, if error then try again later. - to_purge.append(reg) - count += 1 - result_files[run['name']] = results_for_run - - if to_purge: - time.sleep(2) # Wait a bit before trying again to remove run dirs. - for reg in to_purge: - if purge_suite(reg): - to_purge.remove(reg) - - if to_purge: - print(('ERROR: The following suite(s) run ' - 'directories could not be deleted:\n' - '\t' + ' '.join(to_purge)), file=sys.stderr) - - return result_files - - -def delete_result_files(result_files): - """Deletes the temp files used to store experiment results.""" - for files in result_files.values(): - for file_ in files: - for suffix in PROFILE_FILES.values(): - try: - os.remove(file_ + suffix) - except OSError: - pass - - -def profile(schedule): - """Perform profiling for the provided schedule. - - Args: - schedule (dict): Dictionary of cylc version ids containing lists - of the experiments to run for each. - - Returns: - tuple - (results, checkout_count, success) - - results (dict) - A dictionary containing profiling results in the - form {version_id: experiment_id: metric: value}. - - checkout_count (int) - The number of times the git checkout command - has been executed. - - success (bool) - True if all experiments completed successfully, - else False. - """ - checkout_count = 0 - results = {} - success = True - for version_id, experiments in sorted(schedule.items()): - # Checkout cylc version. - if version_id != describe(): - try: - checkout(version_id, delete_pyc=True) - checkout_count += 1 - except GitCheckoutError: - sys.exit('Error: git checkout failed, were changes made to the' - ' working copy?') - - # Run Experiment. - for experiment in experiments: - try: - result_files = run_experiment(experiment['config']) - except ProfilingKilledException as exc: - # Profiling has been terminated, return what results we have. - print(exc) - return results, checkout_count, False - except SuiteFailedException as exc: - # Experiment failed to run, move onto the next one. - print(('Experiment "%s" failed at version "%s"' - '' % (experiment['name'], version_id)), file=sys.stderr) - print(exc, file=sys.stderr) - success = False - continue - else: - # Run analysis. - try: - processed_results = extract_results( - result_files, experiment['config']) - except Exception: - # Analysis failed, move onto the next experiment. - traceback.print_exc() - exp_files = [] - for run in result_files: - exp_files.extend(result_files[run]) - print(( - 'Analysis failed on results from experiment "%s" ' - 'running at version "%s".\n\tProfile files: %s' % ( - experiment['name'], - version_id, - ' '.join(exp_files))), file=sys.stderr) - if any(PROFILE_MODES[mode] == PROFILE_MODE_CYLC - for mode in experiment['config']['profile modes']): - print(( - 'Are you trying to use profile mode "cylc" ' - 'with an older version of cylc?'), file=sys.stderr) - success = False - continue - else: - if version_id not in results: - results[version_id] = {} - results[version_id][experiment['id']] = ( - processed_results) - delete_result_files(result_files) - - return results, checkout_count, success diff --git a/tests/profile-battery/00-compatability.t b/tests/profile-battery/00-compatability.t deleted file mode 100755 index ef0614b2ecd..00000000000 --- a/tests/profile-battery/00-compatability.t +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/bash -# THIS FILE IS PART OF THE CYLC SUITE ENGINE. -# Copyright (C) 2008-2019 NIWA & British Crown (Met Office) & Contributors. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -#------------------------------------------------------------------------------- -# Ensure that any changes to cylc haven't broken the profile-battery command -. $(dirname $0)/test_header -#------------------------------------------------------------------------------- -set_test_number 4 -#------------------------------------------------------------------------------- -# Check the format of `cylc version --long`. -run_ok "${TEST_NAME_BASE}-cylc-version" python3 -c " -import os -import sys -os.chdir('${CYLC_DIR}/lib') -from cylc.profiling import get_cylc_directory -if get_cylc_directory() != '${CYLC_DIR}': - sys.exit(1) -" -#------------------------------------------------------------------------------- -# Check for hello-world suite and that the cylc list command is still instated. -TEST_NAME="${TEST_NAME_BASE}-cylc-list-hello-world-suite" -run_ok "${TEST_NAME}" cylc list "${CYLC_DIR}/etc/dev-suites/hello-world" -cmp_ok "${TEST_NAME}.stdout" "${TEST_NAME}.stdout" "hello-world" -#------------------------------------------------------------------------------- -# Run the test experiment. -TEST_NAME="${TEST_NAME_BASE}-run-test-experiment" -RET_CODE=0 -cylc profile-battery -e 'test' -v 'HEAD' --test >'log' 2>'err' || RET_CODE=$? -if [[ ${RET_CODE} == 0 ]] -then - ok "${TEST_NAME}" -elif [[ ${RET_CODE} == 2 ]] -then - echo "Test requires git repository." >&2 - skip 1 -else - fail "${TEST_NAME}" - LOG_DIR="${TEST_LOG_DIR}/${TEST_NAME}" - mkdir -p "${LOG_DIR}" - mv 'log' 'err' "${LOG_DIR}" - # Move/rename profiling files so they will be cat'ed out by travis-ci. - while read; do - file_path="${REPLY}" - file_prefix=$(basename ${file_path}) - profile_dir=$(dirname ${file_path}) - profile_files=($(find "${profile_dir}" -type f -name "${file_prefix}*" \ - 2>/dev/null)) - for profile_file in ${profile_files[@]}; do - mv "${profile_file}" "${LOG_DIR}/$(basename ${profile_file})-err" - done - done < <(sed -n 's/Profile files:\(.*\)/\1/p' "${LOG_DIR}.stderr") - mv "${LOG_DIR}.log" "${LOG_DIR}.profile-battery-log-err" -fi -exit diff --git a/tests/profile-battery/test_header b/tests/profile-battery/test_header deleted file mode 120000 index 90bd5a36f92..00000000000 --- a/tests/profile-battery/test_header +++ /dev/null @@ -1 +0,0 @@ -../lib/bash/test_header \ No newline at end of file diff --git a/tests/validate/04-builtin-suites.t b/tests/validate/04-builtin-suites.t index b008ad434a6..57ba80893b4 100755 --- a/tests/validate/04-builtin-suites.t +++ b/tests/validate/04-builtin-suites.t @@ -15,7 +15,6 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . #------------------------------------------------------------------------------- -# Ensure that any changes to cylc haven't broken the profile-battery command . "$(dirname "$0")/test_header" #------------------------------------------------------------------------------- # Generate a list of suites.