From d78df6d4f597037f1942f9e8094ce9ac26f7557f Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 22 Jun 2023 17:01:40 -0600 Subject: [PATCH 01/61] Moved and updated `GAPS_SUPPLIED_ARGS` --- gaps/cli/command.py | 13 +----- gaps/cli/config.py | 12 ++++++ tests/cli/test_cli_config.py | 84 +++++++++++++++++++++++++++++++++--- 3 files changed, 90 insertions(+), 19 deletions(-) diff --git a/gaps/cli/command.py b/gaps/cli/command.py index bfde8db6..825c1e7e 100644 --- a/gaps/cli/command.py +++ b/gaps/cli/command.py @@ -9,22 +9,11 @@ import click +from gaps.cli.config import GAPS_SUPPLIED_ARGS from gaps.cli.documentation import CommandDocumentation from gaps.cli.preprocessing import split_project_points_into_ranges -GAPS_SUPPLIED_ARGS = { - "tag", - "command_name", - "config_file", - "project_dir", - "job_name", - "out_dir", - "out_fpath", - "config", -} - - class AbstractBaseCLICommandConfiguration(ABC): """Abstract Base CLI Command representation. diff --git a/gaps/cli/config.py b/gaps/cli/config.py index 8fbd79b1..2f556d06 100644 --- a/gaps/cli/config.py +++ b/gaps/cli/config.py @@ -35,6 +35,18 @@ ] TAG = "_j" MAX_AU_BEFORE_WARNING = 10_000 +GAPS_SUPPLIED_ARGS = { + "tag", + "command_name", + "config_file", + "project_dir", + "job_name", + "out_dir", + "out_fpath", + "config", + "log_directory", + "verbose", +} class _FromConfig: diff --git a/tests/cli/test_cli_config.py b/tests/cli/test_cli_config.py index fbb8b20f..81080f2d 100644 --- a/tests/cli/test_cli_config.py +++ b/tests/cli/test_cli_config.py @@ -363,13 +363,6 @@ def test_warning_about_au_usage( ): """Test the `run` function calls `_kickoff_hpc_job` for multiple nodes.""" - # def assert_message_was_logged(caplog): - # """Assert that a particular (partial) message was logged.""" - # caplog.clear() - - # def assert_message(msg, log_level=None, clear_records=False): - # """Assert that a message was logged.""" - # assert caplog.records tmp_path = test_ctx.obj["TMP_PATH"] if test_class: @@ -759,5 +752,82 @@ def test_func(project_points, input1, input2, tag): assert all(key not in status["run"]["test"] for key in exclude) +@pytest.mark.parametrize("test_extras", [False, True]) +@pytest.mark.parametrize("test_class", [False, True]) +def test_args_passed_to_pre_processor( + tmp_path, test_ctx, test_extras, test_class, runnable_script +): + """Test that correct arguments are passed to the pre-processor.""" + + input_config = { + "execution_control": {"max_workers": 100}, + "input1": 1, + "a_value": 5, + "a_multiplier": 2, + "input2": 7, + "_input2": 8, + "input3": None, + "project_points": [0, 1, 2], + } + config_fp = tmp_path / "config.json" + + if test_extras: + input_config["log_directory"] = str(tmp_path / "other_logs") + input_config["log_level"] = "DEBUG" + + with open(config_fp, "w") as config_file: + json.dump(input_config, config_file) + + def pre_processing( + config, + a_value, + a_multiplier, + command_name, + config_file, + project_dir, + job_name, + out_dir, + out_fpath, + log_directory, + verbose, + ): + assert a_value == 5 + assert a_multiplier == 2 + assert command_name == "run" + assert config_file == config_fp + assert project_dir == tmp_path + assert tmp_path.name in job_name + assert "run" in job_name + assert out_dir == tmp_path + assert out_fpath == out_dir / job_name + if test_extras: + assert log_directory == tmp_path / "other_logs" + assert verbose + else: + assert config == input_config + assert log_directory == tmp_path / "logs" + assert not verbose + + return config + + if test_class: + command_config = CLICommandFromClass( + TestCommand, + "run", + split_keys={"project_points", "input3"}, + config_preprocessor=pre_processing, + ) + else: + command_config = CLICommandFromFunction( + _testing_function, + name="run", + split_keys={"project_points", "input3"}, + config_preprocessor=pre_processing, + ) + + with pytest.warns(gapsWarning) as warning_info: + from_config(config_fp, command_config) + + if __name__ == "__main__": pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) From cd4b5e40a9b35f55be49ff260e98c19106c73543 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 22 Jun 2023 17:04:14 -0600 Subject: [PATCH 02/61] Logging parameters now passed to preprocessor --- gaps/cli/config.py | 17 ++++++++++------- tests/cli/test_cli_command.py | 10 +++------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/gaps/cli/config.py b/gaps/cli/config.py index 2f556d06..d7ef3539 100644 --- a/gaps/cli/config.py +++ b/gaps/cli/config.py @@ -99,11 +99,11 @@ def enable_logging(self): self.log_directory = self.config.pop( "log_directory", (self.project_dir / "logs").as_posix() ) - self.verbose = ( - self.config.pop("log_level", "INFO") == "DEBUG" - or self.ctx.obj.get("VERBOSE", False) - ) - pipe_log_file = Path(self.log_directory) / f"{self.job_name}.log" + self.log_directory = Path(self.log_directory) + self.verbose = self.config.pop( + "log_level", "INFO" + ) == "DEBUG" or self.ctx.obj.get("VERBOSE", False) + pipe_log_file = self.log_directory / f"{self.job_name}.log" init_logger( stream=self.ctx.obj.get("LOG_STREAM", True), level="DEBUG" if self.verbose else "INFO", @@ -127,6 +127,9 @@ def preprocess_config(self): "project_dir": self.project_dir, "job_name": self.job_name, "out_dir": self.project_dir, + "out_fpath": self.project_dir / self.job_name, + "log_directory": self.log_directory, + "verbose": self.verbose, } extra_preprocessor_kwargs = { k: self.config[k] @@ -155,7 +158,7 @@ def set_exec_kwargs(self): self.exec_kwargs = { "option": "local", "sh_script": "", - "stdout_path": (Path(self.log_directory) / "stdout").as_posix(), + "stdout_path": (self.log_directory / "stdout").as_posix(), } self.exec_kwargs.update(self.config.get("execution_control", {})) @@ -177,7 +180,7 @@ def set_logging_options(self): """Assemble the logging options dictionary.""" self.logging_options = { "name": self.job_name, - "log_directory": self.log_directory, + "log_directory": self.log_directory.as_posix(), "verbose": self.verbose, "node": self.exec_kwargs.get("option", "local") != "local", } diff --git a/tests/cli/test_cli_command.py b/tests/cli/test_cli_command.py index 7b412ce2..3b0522e7 100644 --- a/tests/cli/test_cli_command.py +++ b/tests/cli/test_cli_command.py @@ -7,11 +7,8 @@ import click import pytest -from gaps.cli.command import ( - CLICommandFromFunction, - GAPS_SUPPLIED_ARGS, - _WrappedCommand, -) +from gaps.cli.command import CLICommandFromFunction, _WrappedCommand +from gaps.cli.config import GAPS_SUPPLIED_ARGS def test_cli_command_configuration(): @@ -29,8 +26,7 @@ def _test_func(): assert len(ccc.documentation.signatures) == 2 assert not ccc.is_split_spatially assert all( - param in ccc.documentation.skip_params - for param in GAPS_SUPPLIED_ARGS + param in ccc.documentation.skip_params for param in GAPS_SUPPLIED_ARGS ) def _test_preprocessor(config, name, _a_default=3): From 157410cfead709045618644eb61616617eea29b7 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 22 Jun 2023 17:04:28 -0600 Subject: [PATCH 03/61] Updated docstrings --- gaps/cli/command.py | 81 ++++++++++++++++++++++++++++++--------------- 1 file changed, 54 insertions(+), 27 deletions(-) diff --git a/gaps/cli/command.py b/gaps/cli/command.py index 825c1e7e..4d1417af 100644 --- a/gaps/cli/command.py +++ b/gaps/cli/command.py @@ -143,14 +143,13 @@ def __init__( convenience purposes. This argument combines the ``out_dir`` with ``job_name`` to yield a unique output filepath for a given node. Note that the - output filename will contain the tag if it is - "requested" from a node function, but *WILL NOT* - contain a tag if requested from a config - pre-processing function, since the number of split - nodes have not been determined at that point. - Also note that this string *WILL NOT* contain a - file-ending, so that will have to be added by the - node function. + output filename will contain the tag. Also note that + this string *WILL NOT* contain a file-ending, so + that will have to be added by the node function. + max_workers : int + Max number of workers to run per node. This option + will be requested from the user within the + "execution_control" dictionary in the config file. If your function is capable of multiprocessing, you should also include ``max_workers`` in the function signature. @@ -232,43 +231,40 @@ def __init__( can also "request" the following arguments by including them in the function signature: - tag : str - Short string unique to this job run that can be used - to generate unique output filenames, thereby - avoiding clashing output files with jobs on other - nodes. This string contains a leading underscore, - so the file name can easily be generated: - ``f"{out_file_name}{tag}.{extension}"``. command_name : str Name of the command being run. This is equivalent to the ``name`` input above. - config_file : str + config_file : Path Path to the configuration file specified by the user. - project_dir : str + project_dir : Path Path to the project directory (parent directory of the configuration file). job_name : str Name of the job being run. This is typically a combination of the project directory and the command name. - out_dir : str + out_dir : Path Path to output directory - typically equivalent to the project directory. - out_fpath : str + out_fpath : Path Suggested path to output file. You are not required to use this argument - it is provided purely for convenience purposes. This argument combines the ``out_dir`` with ``job_name`` to yield a unique output filepath for a given node. Note that the - output filename will contain the tag if it is - "requested" from a node function, but *WILL NOT* - contain a tag if requested from a config - pre-processing function, since the number of split - nodes have not been determined at that point. + output filename *WILL NOT* contain the tag, since + the number of split nodes have not been determined + when the config pre-processing function is called. Also note that this string *WILL NOT* contain a file-ending, so that will have to be added by the node function. + log_directory : Path + Path to log output directory (defaults to + project_dir / "logs"). + verbose : bool + Flag indicating wether the user has selected a DEBUG + verbosity level for logs. These inputs will be provided by GAPs and *will not* be displayed to users in the template configuration files or @@ -405,10 +401,10 @@ def __init__( command_name : str Name of the command being run. This is equivalent to the ``name`` input argument. - config_file : Path + config_file : str Path to the configuration file specified by the user. - project_dir : Path + project_dir : str Path to the project directory (parent directory of the configuration file). job_name : str @@ -418,9 +414,22 @@ def __init__( will not be included if you request this argument in a config preprocessing function, as the execution has not been split into multiple jobs by that point. - out_dir : Path + out_dir : str Path to output directory - typically equivalent to the project directory. + out_fpath : str + Suggested path to output file. You are not required + to use this argument - it is provided purely for + convenience purposes. This argument combines the + ``out_dir`` with ``job_name`` to yield a unique + output filepath for a given node. Note that the + output filename will contain the tag. Also note that + this string *WILL NOT* contain a file-ending, so + that will have to be added by the node function. + max_workers : int + Max number of workers to run per node. This option + will be requested from the user within the + "execution_control" dictionary in the config file. If your function is capable of multiprocessing, you should also include ``max_workers`` in the function signature. @@ -517,6 +526,24 @@ def __init__( out_dir : Path Path to output directory - typically equivalent to the project directory. + out_fpath : Path + Suggested path to output file. You are not required + to use this argument - it is provided purely for + convenience purposes. This argument combines the + ``out_dir`` with ``job_name`` to yield a unique + output filepath for a given node. Note that the + output filename *WILL NOT* contain the tag, since + the number of split nodes have not been determined + when the config pre-processing function is called. + Also note that this string *WILL NOT* contain a + file-ending, so that will have to be added by the + node function. + log_directory : Path + Path to log output directory (defaults to + project_dir / "logs"). + verbose : bool + Flag indicating wether the user has selected a DEBUG + verbosity level for logs. These inputs will be provided by GAPs and *will not* be displayed to users in the template configuration files or From b75e0c0408ac9100e0f5380fb5ad7b9887b9b707 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 22 Jun 2023 17:25:03 -0600 Subject: [PATCH 04/61] Added info logging --- gaps/cli/config.py | 14 +++++++++++++- tests/cli/test_cli_config.py | 12 +++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/gaps/cli/config.py b/gaps/cli/config.py index d7ef3539..437926ec 100644 --- a/gaps/cli/config.py +++ b/gaps/cli/config.py @@ -113,7 +113,7 @@ def enable_logging(self): def validate_config(self): """Validate the user input config file.""" - logger.debug("Validating %r", self.config_file) + logger.debug("Validating %r", str(self.config_file)) _validate_config(self.config, self.command_config.documentation) return self @@ -205,6 +205,17 @@ def prepare_context(self): self.ctx.obj["OUT_DIR"] = self.project_dir return self + def log_job_info(self): + """Log information about job submission""" + logger.info( + "Running %s from config file: %r", + self.command_name, + str(self.config_file), + ) + logger.info("Target output directory: %r", str(self.project_dir)) + logger.info("Target logging directory: %r", str(self.log_directory)) + return self + def kickoff_jobs(self): """Kickoff jobs across nodes based on config and run function.""" keys_to_run, lists_to_run = self._keys_and_lists_to_run() @@ -315,6 +326,7 @@ def run(self): .set_logging_options() .set_exclude_from_status() .prepare_context() + .log_job_info() .kickoff_jobs() ) diff --git a/tests/cli/test_cli_config.py b/tests/cli/test_cli_config.py index 81080f2d..0b363b31 100644 --- a/tests/cli/test_cli_config.py +++ b/tests/cli/test_cli_config.py @@ -202,7 +202,7 @@ def runnable_script(): ) @pytest.mark.parametrize("test_class", [False, True]) def test_run_local( - test_ctx, extra_input, none_list, runnable_script, test_class + test_ctx, extra_input, none_list, runnable_script, test_class, caplog ): """Test the `run` function locally.""" @@ -248,6 +248,15 @@ def pre_processing(config, a_value, a_multiplier): assert "input2" in warning_info[0].message.args[0] assert "_input2" in warning_info[0].message.args[0] + expected_log_starts = [ + "Running run from config file: '", + "Target output directory: '", + "Target logging directory: '", + ] + for expected in expected_log_starts: + assert any(expected in record.message for record in caplog.records) + assert not any("Path(" in record.message for record in caplog.records) + if "max_workers" in extra_input: expected_message = ( "Found key 'max_workers' outside of 'execution_control'. " @@ -778,6 +787,7 @@ def test_args_passed_to_pre_processor( with open(config_fp, "w") as config_file: json.dump(input_config, config_file) + # pylint: disable=too-many-arguments def pre_processing( config, a_value, From 7fdc24b7e4066938be4f3ab62f04d1e6ec62a078 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 22 Jun 2023 17:28:18 -0600 Subject: [PATCH 05/61] Log command name --- gaps/cli/execution.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gaps/cli/execution.py b/gaps/cli/execution.py index 537ddee0..fa33ad46 100644 --- a/gaps/cli/execution.py +++ b/gaps/cli/execution.py @@ -143,7 +143,8 @@ def _kickoff_hpc_job(ctx, cmd, **kwargs): return name = ctx.obj["NAME"] - logger.info("Running on HPC with job name %r.", name) + command = ctx.obj["COMMAND_NAME"] + logger.info("Running %s on HPC with job name %r.", command, name) logger.debug("Submitting the following command:\n%s", cmd) out = ctx.obj["MANAGER"].submit(name, cmd=cmd, **kwargs)[0] id_msg = f" (Job ID #{out})" if out else "" From e4a15a61aa27931df73a85e11833c8ae480e1f76 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 22 Jun 2023 17:39:08 -0600 Subject: [PATCH 06/61] More thorough test for `_should_run` function --- gaps/cli/execution.py | 5 ++- tests/cli/test_cli_execution.py | 73 ++++++++++++++++++--------------- 2 files changed, 43 insertions(+), 35 deletions(-) diff --git a/gaps/cli/execution.py b/gaps/cli/execution.py index fa33ad46..d89ebd22 100644 --- a/gaps/cli/execution.py +++ b/gaps/cli/execution.py @@ -176,7 +176,10 @@ def _should_run(ctx): job_name=name, subprocess_manager=ctx.obj.get("MANAGER"), ) - if status == StatusOption.SUCCESSFUL: + if status == StatusOption.NOT_SUBMITTED: + return True + + if status in {StatusOption.SUCCESSFUL, StatusOption.COMPLETE}: msg = ( f"Job {name!r} is successful in status json found in {out_dir!r}, " f"not re-running." diff --git a/tests/cli/test_cli_execution.py b/tests/cli/test_cli_execution.py index 2cdaf104..aad4be76 100644 --- a/tests/cli/test_cli_execution.py +++ b/tests/cli/test_cli_execution.py @@ -31,40 +31,45 @@ def test_should_run(test_ctx, caplog, assert_message_was_logged): assert _should_run(test_ctx) assert not caplog.records - Status.make_single_job_file( - test_ctx.obj["OUT_DIR"], - test_ctx.obj["COMMAND_NAME"], - test_ctx.obj["NAME"], - {StatusField.JOB_STATUS: StatusOption.FAILED}, - ) - - assert _should_run(test_ctx) - assert not caplog.records - - Status.make_single_job_file( - test_ctx.obj["OUT_DIR"], - test_ctx.obj["COMMAND_NAME"], - test_ctx.obj["NAME"], - {StatusField.JOB_STATUS: StatusOption.RUNNING}, - ) - - assert not _should_run(test_ctx) - assert_message_was_logged(test_ctx.obj["NAME"], "INFO") - assert_message_was_logged("was found with status", "INFO") - assert_message_was_logged("running", "INFO", clear_records=True) - assert not caplog.records - - Status.make_single_job_file( - test_ctx.obj["OUT_DIR"], - test_ctx.obj["COMMAND_NAME"], - test_ctx.obj["NAME"], - {StatusField.JOB_STATUS: StatusOption.SUCCESSFUL}, - ) - - assert not _should_run(test_ctx) - assert_message_was_logged(test_ctx.obj["NAME"], "INFO") - assert_message_was_logged("is successful", "INFO") - assert_message_was_logged("not re-running", "INFO") + for option in [StatusOption.NOT_SUBMITTED, StatusOption.FAILED]: + Status.make_single_job_file( + test_ctx.obj["OUT_DIR"], + test_ctx.obj["COMMAND_NAME"], + test_ctx.obj["NAME"], + {StatusField.JOB_STATUS: option}, + ) + + assert _should_run(test_ctx) + assert not caplog.records + + for option in [StatusOption.SUBMITTED, StatusOption.RUNNING]: + Status.make_single_job_file( + test_ctx.obj["OUT_DIR"], + test_ctx.obj["COMMAND_NAME"], + test_ctx.obj["NAME"], + {StatusField.JOB_STATUS: option}, + ) + + assert not _should_run(test_ctx) + assert_message_was_logged(test_ctx.obj["NAME"], "INFO") + assert_message_was_logged( + "was found with status", "INFO", clear_records=True + ) + assert not caplog.records + + for option in [StatusOption.SUCCESSFUL, StatusOption.COMPLETE]: + Status.make_single_job_file( + test_ctx.obj["OUT_DIR"], + test_ctx.obj["COMMAND_NAME"], + test_ctx.obj["NAME"], + {StatusField.JOB_STATUS: option}, + ) + + assert not _should_run(test_ctx) + assert_message_was_logged(test_ctx.obj["NAME"], "INFO") + assert_message_was_logged("is successful", "INFO") + assert_message_was_logged("not re-running", "INFO", clear_records=True) + assert not caplog.records def test_kickoff_job_local_basic(test_ctx, assert_message_was_logged): From 0928a4a04e573ddd08d80b0a05d7bbfd13cea4c1 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 22 Jun 2023 18:05:34 -0600 Subject: [PATCH 07/61] Add missing command name in log --- gaps/cli/execution.py | 5 +++-- tests/cli/test_cli_execution.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/gaps/cli/execution.py b/gaps/cli/execution.py index d89ebd22..4ad71b5c 100644 --- a/gaps/cli/execution.py +++ b/gaps/cli/execution.py @@ -114,7 +114,8 @@ def _kickoff_local_job(ctx, cmd): return name = ctx.obj["NAME"] - logger.info("Running locally with job name %r.", name) + command = ctx.obj["COMMAND_NAME"] + logger.info("Running %r locally with job name %r.", command, name) logger.debug("Submitting the following command:\n%s", cmd) Status.mark_job_as_submitted( ctx.obj["OUT_DIR"], @@ -144,7 +145,7 @@ def _kickoff_hpc_job(ctx, cmd, **kwargs): name = ctx.obj["NAME"] command = ctx.obj["COMMAND_NAME"] - logger.info("Running %s on HPC with job name %r.", command, name) + logger.info("Running %r on HPC with job name %r.", command, name) logger.debug("Submitting the following command:\n%s", cmd) out = ctx.obj["MANAGER"].submit(name, cmd=cmd, **kwargs)[0] id_msg = f" (Job ID #{out})" if out else "" diff --git a/tests/cli/test_cli_execution.py b/tests/cli/test_cli_execution.py index aad4be76..06393709 100644 --- a/tests/cli/test_cli_execution.py +++ b/tests/cli/test_cli_execution.py @@ -101,7 +101,7 @@ def test_kickoff_job_local(test_ctx, assert_message_was_logged): exec_kwargs = {"option": "local"} cmd = "python -c \"import warnings; warnings.warn('a test warning')\"" kickoff_job(test_ctx, cmd, exec_kwargs) - assert_message_was_logged("Running locally with job name", "INFO") + assert_message_was_logged("Running 'run' locally with job name", "INFO") assert_message_was_logged(test_ctx.obj["NAME"], "INFO") assert_message_was_logged(cmd, "DEBUG") assert_message_was_logged("Subprocess received stderr") @@ -160,7 +160,7 @@ def _test_submit(cmd): "Found extra keys in 'execution_control'! ", "WARNING" ) assert_message_was_logged("dne_arg", "WARNING") - assert_message_was_logged("Running on HPC with job name", "INFO") + assert_message_was_logged("Running 'run' on HPC with job name", "INFO") assert_message_was_logged(test_ctx.obj["NAME"], "INFO") assert_message_was_logged("Kicked off job") assert_message_was_logged("(Job ID #9999)", clear_records=True) From df31e7ea264c71f6573b8e295f89157230950397 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Mon, 10 Jul 2023 14:57:00 -0600 Subject: [PATCH 08/61] Added logging to status updates --- gaps/status.py | 17 ++++++++++++++++- tests/test_status.py | 9 +++++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/gaps/status.py b/gaps/status.py index 36d9aa17..2dfa8860 100644 --- a/gaps/status.py +++ b/gaps/status.py @@ -1,9 +1,10 @@ # -*- coding: utf-8 -*- """gaps Job status manager. """ import json -import logging import time import shutil +import pprint +import logging import datetime as dt from pathlib import Path from copy import deepcopy @@ -647,6 +648,11 @@ def __init__(self, directory, command, job_name, job_attrs): self.out_file = None def __enter__(self): + logger.debug( + "Received job attributes: %s", + pprint.pformat(self.job_attrs, indent=4), + ) + self.start_time = dt.datetime.now() self.job_attrs.update( { @@ -680,10 +686,19 @@ def __exit__(self, exc_type, exc, traceback): StatusField.JOB_STATUS: StatusOption.SUCCESSFUL, } ) + logger.info( + "Command %r complete. Time elapsed: %s. " + "Target output file: %r", + self.command, + time_elapsed, + self.out_file, + ) else: self.job_attrs.update( {StatusField.JOB_STATUS: StatusOption.FAILED} ) + logger.info("Command %r failed in %s", self.command, time_elapsed) + Status.make_single_job_file( self.directory, command=self.command, diff --git a/tests/test_status.py b/tests/test_status.py index 847cac3d..244f3309 100644 --- a/tests/test_status.py +++ b/tests/test_status.py @@ -567,7 +567,7 @@ def test_parse_command_status(tmp_path): assert not Status.parse_command_status(tmp_path, "generation", key="dne") -def test_status_updates(tmp_path): +def test_status_updates(tmp_path, assert_message_was_logged): """Test `StatusUpdates` context manager""" assert not list(tmp_path.glob("*")) @@ -591,6 +591,9 @@ def test_status_updates(tmp_path): stu.out_file = "my_test_file.h5" + assert_message_was_logged("Command 'generation' complete.", "INFO") + assert_message_was_logged("Target output file: 'my_test_file.h5'", "INFO") + job_files = list(tmp_path.glob("*")) assert len(job_files) == 1 with open(job_files[0]) as job_status: @@ -605,7 +608,7 @@ def test_status_updates(tmp_path): assert test_attrs == TEST_1_ATTRS_1 -def test_status_for_failed_job(tmp_path): +def test_status_for_failed_job(tmp_path, assert_message_was_logged): """Test `StatusUpdates` context manager for a failed job""" class _TestError(ValueError): @@ -629,6 +632,8 @@ class _TestError(ValueError): except _TestError: pass + assert_message_was_logged("Command 'generation' failed in", "INFO") + status = Status(tmp_path).update_from_all_job_files() status = status["generation"]["test0"] From cd2854e15fe374184d6bb0e7aa2a89dbbfc9f1a6 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Mon, 10 Jul 2023 15:13:10 -0600 Subject: [PATCH 09/61] Updated order of logging statements --- gaps/cli/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gaps/cli/config.py b/gaps/cli/config.py index 437926ec..2ea7ebbc 100644 --- a/gaps/cli/config.py +++ b/gaps/cli/config.py @@ -321,12 +321,12 @@ def run(self): return ( self.enable_logging() .validate_config() + .log_job_info() .preprocess_config() .set_exec_kwargs() .set_logging_options() .set_exclude_from_status() .prepare_context() - .log_job_info() .kickoff_jobs() ) From 386b999009a26d3dc2e1d50ae79538759de26762 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Tue, 11 Jul 2023 11:28:25 -0600 Subject: [PATCH 10/61] Added to QOS documentation --- gaps/cli/documentation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py index 0c4c333a..6d21b9a6 100644 --- a/gaps/cli/documentation.py +++ b/gaps/cli/documentation.py @@ -210,7 +210,10 @@ tp use as well as the base AU cost. :allocation: (str) HPC project (allocation) handle. :walltime: (int) Node walltime request in hours. - :qos: (str) Quality-of-service specifier. By default, ``"normal"``. + :qos: (str) Quality-of-service specifier. On Eagle or Kestrel, + this should be one of {{'standby', 'normal', 'high'}}. + Note that 'high' priority doubles the AU cost. By default, + ``"normal"``. :memory: (int, optional) Node memory request in GB. Default is not to specify.{n}{mw} :queue: (str, optional; PBS ONLY) HPC queue to submit job to. From 74cc127c4d8967835da5abf802fe70e5006bd66d Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 20 Jul 2023 18:28:36 -0600 Subject: [PATCH 11/61] Minor docstring updates --- gaps/cli/preprocessing.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/gaps/cli/preprocessing.py b/gaps/cli/preprocessing.py index 1ba2cd94..dc5c4901 100644 --- a/gaps/cli/preprocessing.py +++ b/gaps/cli/preprocessing.py @@ -71,13 +71,13 @@ def preprocess_collect_config( out filepath input from the user. command_name : str Name of the command being run. This is used to parse the - pipeline status for output files if "collect_pattern"="PIPELINE" - in the input `config`. + pipeline status for output files if + ``collect_pattern="PIPELINE"`` in the input `config`. collect_pattern : str | list | dict, optional - Unix-style /filepath/pattern*.h5 representing the files to be - collected into a single output HDF5 file. If no output file path - is specified (i.e. this input is a single pattern or a list of - patterns), the output file path will be inferred from the + Unix-style ``/filepath/pattern*.h5`` representing the files to + be collected into a single output HDF5 file. If no output file + path is specified (i.e. this input is a single pattern or a list + of patterns), the output file path will be inferred from the pattern itself (specifically, the wildcard will be removed and the result will be the output file path). If a list of patterns is provided, each pattern will be collected into a @@ -86,10 +86,10 @@ def preprocess_collect_config( output file (including the filename itself; relative paths are allowed) and the values are patterns representing the files that should be collected into the output file. If running a collect - job as part of a pipeline, this input can be set to "PIPELINE", - which will parse the output of the previous step and generate - the input file pattern and output file name automatically. - By default, ``"PIPELINE"``. + job as part of a pipeline, this input can be set to + ``"PIPELINE"``, which will parse the output of the previous step + and generate the input file pattern and output file name + automatically. By default, ``"PIPELINE"``. Returns ------- From 7eae807f49a2f732c4c2122624a5fefc212c5a15 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Fri, 21 Jul 2023 10:48:41 -0600 Subject: [PATCH 12/61] Update docstrings --- gaps/cli/documentation.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py index 6d21b9a6..bce85410 100644 --- a/gaps/cli/documentation.py +++ b/gaps/cli/documentation.py @@ -214,22 +214,24 @@ this should be one of {{'standby', 'normal', 'high'}}. Note that 'high' priority doubles the AU cost. By default, ``"normal"``. - :memory: (int, optional) Node memory request in GB. Default is not to - specify.{n}{mw} + :memory: (int, optional) Node memory request in GB. By default, + ``None``, which does not specify a memory limit.{n}{mw} :queue: (str, optional; PBS ONLY) HPC queue to submit job to. Examples include: 'debug', 'short', 'batch', 'batch-h', - 'long', etc. By default, `None`, which uses `test_queue`. + 'long', etc. By default, ``None``, which uses "test_queue". :feature: (str, optional) Additional flags for SLURM job - (e.g. "-p debug"). Default is not to specify. + (e.g. "-p debug"). By default, ``None``, which does not + specify any additional flags. :conda_env: (str, optional) Name of conda environment to activate. - Default is not to load any environments. - :module: (str, optional) Module to load. Default is not to load any - modules. + By default, ``None``, which does not load any environments. + :module: (str, optional) Module to load. By default, ``None``, which + does not load any modules. :sh_script: (str, optional) Extra shell script to run before - command call. Default is not to run any scripts. + command call. By default, ``None``, which does not run any + scripts. - Only the "option" input is required for local execution. For - execution on the HPC, the allocation and walltime are also + Only the `option` key is required for local execution. For + execution on the HPC, the `allocation` and `walltime` keys are also required. All other options are populated with default values, as seen above. log_directory : str @@ -247,8 +249,7 @@ "\n :nodes: (int, optional) Number of nodes to split the project " "\n points across. Note that the total number of requested " "\n nodes for a job may be larger than this value if the " - "\n command splits across other inputs (e.g. analysis " - "\n years) Default is 1." + "\n command splits across other inputs. Default is ``1``." ) MW_DOC = "\n :max_workers: ({type}) {desc}" From 5fbe55b9000cc0cc34701c767e22ee333337b247 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Fri, 21 Jul 2023 13:59:09 -0600 Subject: [PATCH 13/61] Allow extra args to be added to exec control --- gaps/cli/config.py | 44 +++--- gaps/cli/documentation.py | 141 +++++++++++------ gaps/cli/execution.py | 3 +- tests/cli/test_cli_config.py | 13 +- tests/cli/test_cli_documentation.py | 224 +++++++++++++++++++++------- 5 files changed, 309 insertions(+), 116 deletions(-) diff --git a/gaps/cli/config.py b/gaps/cli/config.py index 2ea7ebbc..d3b7f33c 100644 --- a/gaps/cli/config.py +++ b/gaps/cli/config.py @@ -19,6 +19,7 @@ from gaps.config import load_config from gaps.log import init_logger from gaps.cli.execution import kickoff_job +from gaps.cli.documentation import EXTRA_EXEC_PARAMS from gaps.exceptions import gapsKeyError from gaps.warnings import gapsWarning @@ -162,17 +163,21 @@ def set_exec_kwargs(self): } self.exec_kwargs.update(self.config.get("execution_control", {})) - if "max_workers" in self.config: - max_workers = self.config.pop("max_workers") + extra_params = set() + for extra_exec_param in EXTRA_EXEC_PARAMS: + if extra_exec_param in self.config: + extra_params.add(extra_exec_param) + param = self.config.pop(extra_exec_param) + self.exec_kwargs[extra_exec_param] = param + + if extra_params: msg = ( - "Found key 'max_workers' outside of 'execution_control'. " - f"Moving 'max_workers' value ({max_workers}) into " - "'execution_control' block. To silence this warning, " - "please specify 'max_workers' inside of the " - "'execution_control' block." + f"Found key(s) {extra_params} outside of 'execution_control'. " + "Moving these keys into 'execution_control' block. To " + "silence this warning, please specify these keys inside of " + "the 'execution_control' block." ) warn(msg, gapsWarning) - self.exec_kwargs["max_workers"] = max_workers return self @@ -224,7 +229,10 @@ def kickoff_jobs(self): num_jobs_submit = len(jobs) self._warn_about_excessive_au_usage(num_jobs_submit) n_zfill = len(str(num_jobs_submit)) - max_workers_per_node = self.exec_kwargs.pop("max_workers", None) + extra_exec_args = {} + for param in EXTRA_EXEC_PARAMS: + if param in self.exec_kwargs: + extra_exec_args[param] = self.exec_kwargs.pop(param) for node_index, values in enumerate(jobs): if num_jobs_submit > 1: tag = f"{TAG}{str(node_index).zfill(n_zfill)}" @@ -242,12 +250,12 @@ def kickoff_jobs(self): "job_name": job_name, "out_dir": self.project_dir.as_posix(), "out_fpath": (self.project_dir / job_name).as_posix(), - "max_workers": max_workers_per_node, "run_method": getattr( self.command_config, "run_method", None ), } ) + node_specific_config.update(extra_exec_args) for key, val in zip(keys_to_run, values): if isinstance(key, str): @@ -349,8 +357,9 @@ def _ensure_required_args_exist(config, documentation): name for name in documentation.required_args if name not in config } - if documentation.max_workers_required: - missing = _mark_max_workers_missing_if_needed(config, missing) + missing = _mark_extra_exec_params_missing_if_needed( + documentation, config, missing + ) if any(missing): msg = ( @@ -360,12 +369,13 @@ def _ensure_required_args_exist(config, documentation): raise gapsKeyError(msg) -def _mark_max_workers_missing_if_needed(config, missing): - """Add max_workers as missing key if it is not in `execution_control.""" - +def _mark_extra_exec_params_missing_if_needed(documentation, config, missing): + """Add extra exec params as missing if not found in `execution_control.""" exec_control = config.get("execution_control", {}) - if "max_workers" not in config and "max_workers" not in exec_control: - missing |= {"max_workers"} + for param in EXTRA_EXEC_PARAMS: + if documentation.param_required(param): + if param not in config and param not in exec_control: + missing |= {param} return missing diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py index bce85410..8ce8aa8a 100644 --- a/gaps/cli/documentation.py +++ b/gaps/cli/documentation.py @@ -3,6 +3,7 @@ CLI documentation utilities. """ from copy import deepcopy +from functools import lru_cache from inspect import signature, isclass from numpydoc.docscrape import NumpyDocString @@ -24,6 +25,24 @@ "sh_script": None, } +EXTRA_EXEC_PARAMS = { + "max_workers": "Maximum number of parallel workers run on each node.", + "sites_per_worker": "Number of sites to run in series on a worker.", + "mem_util_lim": ( + "Memory utilization limit (fractional). Must be a value between 0 " + "and 100. This input sets how much data will be stored in-memory " + "at any given time before flushing to disk." + ), + "timeout": ( + "Number of seconds to wait for parallel run iteration to complete " + "before early termination" + ), + "pool_size": ( + "Number of futures to submit to a single process pool for " + "parallel futures." + ), +} + CONFIG_TYPES = [ ConfigType.JSON, ConfigType.YAML, @@ -215,7 +234,7 @@ Note that 'high' priority doubles the AU cost. By default, ``"normal"``. :memory: (int, optional) Node memory request in GB. By default, - ``None``, which does not specify a memory limit.{n}{mw} + ``None``, which does not specify a memory limit.{n}{eep} :queue: (str, optional; PBS ONLY) HPC queue to submit job to. Examples include: 'debug', 'short', 'batch', 'batch-h', 'long', etc. By default, ``None``, which uses "test_queue". @@ -251,7 +270,7 @@ "\n nodes for a job may be larger than this value if the " "\n command splits across other inputs. Default is ``1``." ) -MW_DOC = "\n :max_workers: ({type}) {desc}" +EXTRA_EXEC_PARAM_DOC = "\n :{name}: ({type}) {desc}" class CommandDocumentation: @@ -294,7 +313,7 @@ def __init__(self, *functions, skip_params=None, is_split_spatially=False): p.name: p for doc in self.docs for p in doc["Parameters"] } self.skip_params = set() if skip_params is None else set(skip_params) - self.skip_params |= {"cls", "self", "max_workers"} + self.skip_params |= {"cls", "self"} | set(EXTRA_EXEC_PARAMS) self.is_split_spatially = is_split_spatially @property @@ -303,64 +322,61 @@ def default_exec_values(self): exec_vals = deepcopy(DEFAULT_EXEC_VALUES) if not self.is_split_spatially: exec_vals.pop("nodes", None) - if self.max_workers_in_func_signature: - exec_vals["max_workers"] = ( - self.REQUIRED_TAG - if self.max_workers_required - else self.max_workers_param.default - ) + for param in EXTRA_EXEC_PARAMS: + if self._param_in_func_signature(param): + exec_vals[param] = ( + self.REQUIRED_TAG + if self.param_required(param) + else self._param_value(param).default + ) return exec_vals @property def exec_control_doc(self): """str: Execution_control documentation.""" nodes_doc = NODES_DOC if self.is_split_spatially else "" - return EXEC_CONTROL_DOC.format(n=nodes_doc, mw=self._max_workers_doc) + return EXEC_CONTROL_DOC.format( + n=nodes_doc, eep=self._extra_exec_param_doc + ) @property - def _max_workers_doc(self): - """str: `MW_DOC` formatted with the info from the input func.""" - param = self.param_docs.get("max_workers") + def _extra_exec_param_doc(self): + """str: Docstring formatted with the info from the input func.""" + return "".join( + [ + self._format_extra_exec_param_doc(param_name) + for param_name in EXTRA_EXEC_PARAMS + ] + ) + + def _format_extra_exec_param_doc(self, param_name): + """Format extra exec control parameters""" + param = self.param_docs.get(param_name) try: - return MW_DOC.format(type=param.type, desc=" ".join(param.desc)) + return EXTRA_EXEC_PARAM_DOC.format( + name=param_name, type=param.type, desc=" ".join(param.desc) + ) except AttributeError: pass - if self.max_workers_in_func_signature: - return MW_DOC.format( - type=( - "(int)" if self.max_workers_required else "(int, optional)" - ), - desc=( - "Maximum number of parallel workers run on each node." - "Default is `None`, which uses all available cores." - ), + if self._param_in_func_signature(param_name): + if self.param_required(param_name): + param_type = "int" + default_text = "" + else: + default_val = self._param_value(param_name).default + if default_val is None: + param_type = "int, optional" + else: + param_type = f"{type(default_val).__name__}, optional" + default_text = f"By default, ``{default_val}``." + return EXTRA_EXEC_PARAM_DOC.format( + name=param_name, + type=param_type, + desc=" ".join([EXTRA_EXEC_PARAMS[param_name], default_text]), ) - return "" - @property - def max_workers_in_func_signature(self): - """bool: `True` if "max_workers" is a param of the input function.""" - return self.max_workers_param is not None - - @property - def max_workers_param(self): - """bool: `True` if "max_workers" is a required parameter of `func`.""" - for sig in self.signatures: - for name in sig.parameters: - if name == "max_workers": - return sig.parameters["max_workers"] - return None - - @property - def max_workers_required(self): - """bool: `True` if "max_workers" is a required parameter of `func`.""" - param = self.max_workers_param - if param is None: - return False - return param.default is param.empty - @property def required_args(self): """set: Required parameters of the input function.""" @@ -462,6 +478,39 @@ def command_help(self, command_name): name=command_name, desc=self.extended_summary ) + @lru_cache(maxsize=16) + def _param_value(self, param): + """Extract parameter if it exists in signature""" + for sig in self.signatures: + for name in sig.parameters: + if name == param: + return sig.parameters[param] + return None + + @lru_cache(maxsize=16) + def _param_in_func_signature(self, param): + """`True` if `param` is a param of the input function.""" + return self._param_value(param) is not None + + @lru_cache(maxsize=16) + def param_required(self, param): + """Check wether a parameter is a required input for the run function. + + Parameters + ---------- + param : str + Name of parameter to check. + + Returns + ------- + bool + ``True`` if `param` is a required parameter of `func`. + """ + param = self._param_value(param) + if param is None: + return False + return param.default is param.empty + def _pipeline_command_help(pipeline_config): """Generate pipeline command help from a sample config.""" diff --git a/gaps/cli/execution.py b/gaps/cli/execution.py index 4ad71b5c..e3e53c6c 100644 --- a/gaps/cli/execution.py +++ b/gaps/cli/execution.py @@ -74,7 +74,8 @@ def _filter_exec_kwargs(kwargs, func, hardware_option): if extra_keys: msg = ( f"Found extra keys in 'execution_control'! The following " - f"inputs will be ignored: {extra_keys}" + f"inputs will be ignored: {extra_keys}. To silence this warning, " + "please remove the extra keys from the 'execution_control' block." ) warn(msg, gapsWarning) diff --git a/tests/cli/test_cli_config.py b/tests/cli/test_cli_config.py index 0b363b31..c2ba406a 100644 --- a/tests/cli/test_cli_config.py +++ b/tests/cli/test_cli_config.py @@ -42,6 +42,7 @@ def _testing_function( out_dir, out_fpath, max_workers, + pool_size=16, _input2=None, _z_0=None, ): @@ -71,6 +72,8 @@ def _testing_function( Internal GAPs out filepath. max_workers : int Max workers. + pool_size : int, optional + Pool size. By default, ``16``. _input2 : float, optional Secret input 2. By default, ``None``. _z_0 : str, optional @@ -85,6 +88,7 @@ def _testing_function( "_input2": _input2, "input3": input3, "max_workers": max_workers, + "pool_size": pool_size, "_z_0": _z_0, "out_fpath": out_fpath, "out_dir": out_dir, @@ -130,6 +134,7 @@ def run( out_dir, out_fpath, max_workers, + pool_size=16, _z_0=None, ): """Test run function for CLI around. @@ -154,6 +159,8 @@ def run( Internal GAPs out filepath. max_workers : int Max workers. + pool_size : int, optional + Pool size. By default, ``16``. _z_0 : str, optional Secret str. By default, ``None``. """ @@ -166,6 +173,7 @@ def run( "_input2": self._input2, "input3": self._input3, "max_workers": max_workers, + "pool_size": pool_size, "_z_0": _z_0, "out_fpath": out_fpath, "out_dir": out_dir, @@ -259,8 +267,8 @@ def pre_processing(config, a_value, a_multiplier): if "max_workers" in extra_input: expected_message = ( - "Found key 'max_workers' outside of 'execution_control'. " - "Moving 'max_workers' value (100) into 'execution_control' block." + "Found key(s) {'max_workers'} outside of 'execution_control'. " + "Moving these keys into 'execution_control' block." ) assert expected_message in warning_info[1].message.args[0] @@ -278,6 +286,7 @@ def pre_processing(config, a_value, a_multiplier): assert outputs["_input2"] == 10 assert outputs["input3"] is None assert outputs["max_workers"] == 100 + assert outputs["pool_size"] == 16 status = Status(tmp_path).update_from_all_job_files() assert len(status["run"]) == 1 diff --git a/tests/cli/test_cli_documentation.py b/tests/cli/test_cli_documentation.py index 09588114..16f7dc11 100644 --- a/tests/cli/test_cli_documentation.py +++ b/tests/cli/test_cli_documentation.py @@ -9,7 +9,11 @@ import pytest -from gaps.cli.documentation import DEFAULT_EXEC_VALUES, CommandDocumentation +from gaps.cli.documentation import ( + DEFAULT_EXEC_VALUES, + EXTRA_EXEC_PARAMS, + CommandDocumentation, +) def func_no_args(): @@ -26,87 +30,207 @@ def test_command_documentation_copies_skip_params(): skip_params_set = ["a"] doc = CommandDocumentation(func_no_args, skip_params=skip_params_set) assert skip_params_set == ["a"] - assert doc.skip_params == {"a", "cls", "self", "max_workers"} + assert doc.skip_params == {"a", "cls", "self"} | set(EXTRA_EXEC_PARAMS) -def test_command_documentation_max_workers(): - """Test the `CommandDocumentation` with and without `max_workers`.""" +def test_command_documentation_extra_exec_params(): + """Test the `CommandDocumentation` with extra exec params.""" - def func(max_workers): + def func(max_workers, sites_per_worker, mem_util_lim, timeout, pool_size): """A short description. Parameters ---------- max_workers : int Number of workers to run. + sites_per_worker : float + Number of sites to run. + mem_util_lim : str + A test documentation. + timeout : dict + A timeout value. + pool_size : list + A worker pool size. """ + expected_parameters = [ + "max_workers", + "sites_per_worker", + "mem_util_lim", + "timeout", + "pool_size", + ] + expected_types = ["(int)", "(float)", "(str)", "(dict)", "(list)"] + expected_decs = [ + "Number of workers to run.", + "Number of sites to run.", + "A test documentation.", + "A timeout value.", + "A worker pool size.", + ] + expected_iter = zip(expected_parameters, expected_types, expected_decs) + doc = CommandDocumentation(func) - assert doc.max_workers_in_func_signature - assert doc.max_workers_required - assert "Number of workers to run." in doc._max_workers_doc - assert "(int)" in doc._max_workers_doc - assert "max_workers" in doc.exec_control_doc + for param, p_type, p_doc in expected_iter: + assert doc._param_in_func_signature(param) + assert doc.param_required(param) + assert p_doc in doc._format_extra_exec_param_doc(param) + assert p_type in doc._format_extra_exec_param_doc(param) + assert param in doc.exec_control_doc - execution_control = doc.template_config["execution_control"] - assert execution_control["max_workers"] == doc.REQUIRED_TAG - assert doc.default_exec_values["max_workers"] == doc.REQUIRED_TAG + execution_control = doc.template_config["execution_control"] + assert execution_control[param] == doc.REQUIRED_TAG + assert doc.default_exec_values[param] == doc.REQUIRED_TAG - def func(max_workers): - """A short description.""" - doc = CommandDocumentation(func) - assert doc.max_workers_in_func_signature - assert doc.max_workers_required - assert doc._max_workers_doc - assert "(int)" in doc._max_workers_doc - assert "max_workers" in doc.exec_control_doc +def test_command_documentation_extra_exec_params_no_user_doc(): + """Test the `CommandDocumentation` with extra exec params no user doc.""" - execution_control = doc.template_config["execution_control"] - assert execution_control["max_workers"] == doc.REQUIRED_TAG - assert doc.default_exec_values["max_workers"] == doc.REQUIRED_TAG + def func(max_workers, sites_per_worker, mem_util_lim, timeout, pool_size): + """A short description.""" - def func(max_workers=2): + expected_parameters = [ + "max_workers", + "sites_per_worker", + "mem_util_lim", + "timeout", + "pool_size", + ] + doc = CommandDocumentation(func) + for param in expected_parameters: + assert doc._param_in_func_signature(param) + assert doc.param_required(param) + assert doc._format_extra_exec_param_doc(param) + assert "(int)" in doc._format_extra_exec_param_doc(param) + assert param in doc.exec_control_doc + + execution_control = doc.template_config["execution_control"] + assert execution_control["max_workers"] == doc.REQUIRED_TAG + assert doc.default_exec_values["max_workers"] == doc.REQUIRED_TAG + + +def test_command_documentation_extra_exec_params_user_defaults(): + """Test the `CommandDocumentation` with extra exec params and defaults.""" + + def func( + max_workers=2, + sites_per_worker=0.4, + mem_util_lim="test", + timeout=None, + pool_size=None, + ): """A short description. Parameters ---------- max_workers : int, optional - Number of workers to run. By default, `2`. + Number of workers to run. By default, ``2``. + sites_per_worker : float, optional + Number of sites to run. By default, ``0.4``. + mem_util_lim : str, optional + A test documentation. By default, ``"test"``. + timeout : dict, optional + A timeout value. By default, ``None``. + pool_size : list, optional + A worker pool size. By default, ``None``. """ + expected_parameters = [ + "max_workers", + "sites_per_worker", + "mem_util_lim", + "timeout", + "pool_size", + ] + expected_types = [ + "(int, optional)", + "(float, optional)", + "(str, optional)", + "(dict, optional)", + "(list, optional)", + ] + expected_decs = [ + "Number of workers to run.", + "Number of sites to run.", + "A test documentation.", + "A timeout value.", + "A worker pool size.", + ] + expected_value = [2, 0.4, "test", None, None] + expected_iter = zip( + expected_parameters, expected_types, expected_decs, expected_value + ) + doc = CommandDocumentation(func) - assert doc.max_workers_in_func_signature - assert not doc.max_workers_required - assert "Number of workers to run." in doc._max_workers_doc - assert "(int, optional)" in doc._max_workers_doc - assert "max_workers" in doc.exec_control_doc + for param, p_type, p_doc, p_val in expected_iter: + assert doc._param_in_func_signature(param) + assert not doc.param_required(param) + assert p_doc in doc._format_extra_exec_param_doc(param) + assert p_type in doc._format_extra_exec_param_doc(param) + assert param in doc.exec_control_doc + + execution_control = doc.template_config["execution_control"] + assert execution_control[param] == p_val + assert doc.default_exec_values[param] == p_val + + +def test_command_documentation_extra_exec_params_defaults_no_docs(): + """Test documentation with extra exec params, defaults, no doc.""" + + def func( + max_workers=2, + sites_per_worker=0.4, + mem_util_lim="test", + timeout=None, + pool_size=None, + ): + """A short description.""" - execution_control = doc.template_config["execution_control"] - assert execution_control["max_workers"] == 2 - assert doc.default_exec_values["max_workers"] == 2 + expected_parameters = [ + "max_workers", + "sites_per_worker", + "mem_util_lim", + "timeout", + "pool_size", + ] + expected_types = [ + "(int, optional)", + "(float, optional)", + "(str, optional)", + "(int, optional)", + "(int, optional)", + ] - def func(max_workers=None): - """A short description.""" + expected_value = [2, 0.4, "test", None, None] + expected_iter = zip(expected_parameters, expected_types, expected_value) doc = CommandDocumentation(func) - assert doc.max_workers_in_func_signature - assert not doc.max_workers_required - assert doc._max_workers_doc - assert "(int, optional)" in doc._max_workers_doc - assert "max_workers" in doc.exec_control_doc + for param, p_type, p_val in expected_iter: + assert doc._param_in_func_signature(param) + assert not doc.param_required(param) + assert doc._extra_exec_param_doc + assert p_type in doc._format_extra_exec_param_doc(param) + assert f"``{p_val}``" in doc._format_extra_exec_param_doc(param) + assert param in doc.exec_control_doc + + execution_control = doc.template_config["execution_control"] + assert execution_control[param] == p_val + assert doc.default_exec_values[param] == p_val + - execution_control = doc.template_config["execution_control"] - assert execution_control["max_workers"] is None - assert doc.default_exec_values["max_workers"] is None +def test_command_documentation_no_extra_exec_params(): + """Test documentation with no extra exec params""" doc = CommandDocumentation(func_no_args) - assert not doc.max_workers_in_func_signature - assert not doc.max_workers_required - assert not doc._max_workers_doc - assert "max_workers" not in doc.template_config["execution_control"] - assert "max_workers" not in doc.default_exec_values - assert "max_workers" not in doc.exec_control_doc + for param in EXTRA_EXEC_PARAMS: + assert not doc._param_in_func_signature(param) + assert not doc.param_required(param) + assert not doc._format_extra_exec_param_doc(param) + assert param not in doc.template_config["execution_control"] + assert param not in doc.default_exec_values + assert param not in doc.exec_control_doc + + assert not doc._extra_exec_param_doc def test_command_documentation_default_exec_values_and_doc(): From a252d449c73f01d9da156ea2853d43eeb7d89de1 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Sat, 22 Jul 2023 14:16:30 -0600 Subject: [PATCH 14/61] Add main CLI help docs --- gaps/cli/cli.py | 22 +++++++----- gaps/cli/documentation.py | 70 ++++++++++++++++++++++++++++++++++++--- tests/cli/test_cli.py | 9 +++++ 3 files changed, 88 insertions(+), 13 deletions(-) diff --git a/gaps/cli/cli.py b/gaps/cli/cli.py index b9508137..087e30ef 100644 --- a/gaps/cli/cli.py +++ b/gaps/cli/cli.py @@ -16,6 +16,7 @@ CLICommandFromFunction, _WrappedCommand, ) +from gaps.cli.documentation import _main_command_help from gaps.cli.preprocessing import preprocess_collect_config from gaps.cli.status import status_command @@ -56,9 +57,8 @@ def convert_to_commands(self): command = as_click_command(command_config) self.commands.append(command) Pipeline.COMMANDS[command_config.name] = command - self.template_configs[command_config.name] = ( - command_config.documentation.template_config - ) + template_config = command_config.documentation.template_config + self.template_configs[command_config.name] = template_config return self def add_pipeline_command(self): @@ -181,6 +181,7 @@ def make_cli(commands, info=None): """ info = info or {} command_generator = _CLICommandGenerator(commands) + commands = command_generator.generate() options = [ click.Option( @@ -189,12 +190,17 @@ def make_cli(commands, info=None): help="Flag to turn on debug logging. Default is not verbose.", ), ] - prog_name = [info["name"]] if "name" in info else [] + + prog_name = info.get("name") + if prog_name: + main_help = _main_command_help( + prog_name, command_generator.command_configs + ) + else: + main_help = "Command Line Interface" + main = click.Group( - help=" ".join(prog_name + ["Command Line Interface"]), - params=options, - callback=_main_cb, - commands=command_generator.generate(), + help=main_help, params=options, callback=_main_cb, commands=commands ) version = info.get("version") if version is not None: diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py index 8ce8aa8a..6ca69c50 100644 --- a/gaps/cli/documentation.py +++ b/gaps/cli/documentation.py @@ -49,9 +49,60 @@ ConfigType.TOML, ] +MAIN_DOC = """{name} Command Line Interface. + +Typically, a good place to start is to set up a {name} job with a pipeline +config that points to several {name} modules that you want to run in serial. + +To begin, you can generate some template configuration files using:: + + $ {name} template-configs + +By default, this generates template JSON configuration files, though you +can request YAML or TOML configuration files instead. YOu can run +``$ {name} template-configs --help`` on the command line to see all available +options for the ``template-configs`` command. Once the template configuration +files have been generated, you can fill them out by referring to the +module CLI documentation (if available) or the help pages of the module CLIs +for more details on the config options for each CLI command:: + + {cli_help_str} + +After appropriately filling our the configuration files for each module you +want to run, you can call the {name} pipeline CLI using:: + + $ {name} pipeline -c config_pipeline.json + +This command will run each pipeline step in sequence. + +.. Note:: You will need to re-submit the ``pipeline`` command above after +each completed pipeline step. + +To check the status of the pipeline, you can run:: + + $ {name} status + +This will print a report to the command line detailing the progress of the +current pipeline. See ``$ {name} status --help`` for all status command +options. + +If you need to parameterize the pipeline execution, you can use the ``batch`` +command. For details on setting up a batch config file, see the documentation +or run:: + + ${name} batch --help + +on the command line. Once you set up a batch config file, you can execute +it using:: + + $ {name} status -c config_batch.json + +The general structure of the {name} CLI is given below. +""" + PIPELINE_CONFIG_DOC = """ -Path to the "pipeline" configuration file. This argument can be left out, -but *one and only one file* with the name "pipeline" should exist in the +Path to the ``pipeline`` configuration file. This argument can be left out, +but *one and only one file* with "pipeline" in the name should exist in the directory and contain the config information. Below is a sample template config .. tabs:: @@ -87,7 +138,7 @@ """ BATCH_CONFIG_DOC = """ -Path to the "batch" configuration file. Below is a sample template config +Path to the ``batch`` configuration file. Below is a sample template config .. tabs:: @@ -229,8 +280,8 @@ tp use as well as the base AU cost. :allocation: (str) HPC project (allocation) handle. :walltime: (int) Node walltime request in hours. - :qos: (str) Quality-of-service specifier. On Eagle or Kestrel, - this should be one of {{'standby', 'normal', 'high'}}. + :qos: (str, optional) Quality-of-service specifier. On Eagle or + Kestrel, this should be one of {{'standby', 'normal', 'high'}}. Note that 'high' priority doubles the AU cost. By default, ``"normal"``. :memory: (int, optional) Node memory request in GB. By default, @@ -512,6 +563,15 @@ def param_required(self, param): return param.default is param.empty +def _main_command_help(prog_name, commands): + """Generate main command help from commands input.""" + cli_help_str = "\n\n ".join( + [f"$ {prog_name} --help"] + + [f"$ {prog_name} {command.name} --help" for command in commands] + ) + return MAIN_DOC.format(name=prog_name, cli_help_str=cli_help_str) + + def _pipeline_command_help(pipeline_config): """Generate pipeline command help from a sample config.""" format_inputs = {} diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index c5fbc053..f06b4a03 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -91,6 +91,13 @@ def test_make_cli(): ]: assert expected_command in main.commands + assert "test" in main.help + assert "$ test --help" in main.help + assert "$ test run --help" in main.help + assert "$ test analyze --help" in main.help + assert "$ test collect-analyze --help" in main.help + assert "$ test collect-run --help" not in main.help + @pytest.mark.integration def test_cli( @@ -124,6 +131,8 @@ def preprocess_run_config(config, project_dir, out_dir): main = make_cli(commands) + assert main.help == "Command Line Interface" + assert not set(tmp_cwd.glob("*")) cli_runner.invoke(main, ["template-configs"]) files = set(tmp_cwd.glob("*")) From 90ea6d8697137a7ec8cc8f828a0fd746376ddf2d Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Sat, 22 Jul 2023 14:24:35 -0600 Subject: [PATCH 15/61] Minor docstring updates --- gaps/cli/documentation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py index 6ca69c50..776440f7 100644 --- a/gaps/cli/documentation.py +++ b/gaps/cli/documentation.py @@ -59,7 +59,7 @@ $ {name} template-configs By default, this generates template JSON configuration files, though you -can request YAML or TOML configuration files instead. YOu can run +can request YAML or TOML configuration files instead. You can run ``$ {name} template-configs --help`` on the command line to see all available options for the ``template-configs`` command. Once the template configuration files have been generated, you can fill them out by referring to the @@ -267,6 +267,9 @@ {desc} +The general structure for calling this CLI command is given below (add +``--help`` to print help info to the terminal). + """ EXEC_CONTROL_DOC = """ Parameters From e8673c85c042b3d3b8009304fb2083173bd299da Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Sat, 22 Jul 2023 14:28:45 -0600 Subject: [PATCH 16/61] More typo fixes --- gaps/cli/documentation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py index 776440f7..91baf52b 100644 --- a/gaps/cli/documentation.py +++ b/gaps/cli/documentation.py @@ -76,7 +76,7 @@ This command will run each pipeline step in sequence. .. Note:: You will need to re-submit the ``pipeline`` command above after -each completed pipeline step. + each completed pipeline step. To check the status of the pipeline, you can run:: @@ -90,7 +90,7 @@ command. For details on setting up a batch config file, see the documentation or run:: - ${name} batch --help + $ {name} batch --help on the command line. Once you set up a batch config file, you can execute it using:: From 03f7807ba09d25efe48b271807cd457a41ebe596 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Sat, 22 Jul 2023 14:57:49 -0600 Subject: [PATCH 17/61] Add extra command help documentation --- gaps/cli/batch.py | 6 +++++- gaps/cli/pipeline.py | 6 +++++- gaps/cli/status.py | 3 +++ gaps/cli/templates.py | 11 ++++++++--- 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/gaps/cli/batch.py b/gaps/cli/batch.py index 3853f840..b287dbaf 100644 --- a/gaps/cli/batch.py +++ b/gaps/cli/batch.py @@ -64,7 +64,11 @@ def batch_command(): context_settings=None, callback=_batch, params=params, - help="Execute an analysis pipeline over a parametric set of inputs", + help=( + "Execute an analysis pipeline over a parametric set of inputs.\n\n" + "The general structure for calling this CLI command is given " + "below (add``--help`` to print help info to the terminal)." + ), epilog=None, short_help=None, options_metavar="[OPTIONS]", diff --git a/gaps/cli/pipeline.py b/gaps/cli/pipeline.py index 5288d8d1..ef1dfae7 100644 --- a/gaps/cli/pipeline.py +++ b/gaps/cli/pipeline.py @@ -127,7 +127,11 @@ def pipeline_command(template_config): context_settings=None, callback=pipeline, params=params, - help="""Execute multiple steps in an analysis pipeline""", + help=( + "Execute multiple steps in an analysis pipeline.\n\n" + "The general structure for calling this CLI command is given " + "below (add``--help`` to print help info to the terminal)." + ), epilog=None, short_help=None, options_metavar="[OPTIONS]", diff --git a/gaps/cli/status.py b/gaps/cli/status.py index 9a4e42c7..1f7f8eb0 100644 --- a/gaps/cli/status.py +++ b/gaps/cli/status.py @@ -48,6 +48,9 @@ Display the status of a project FOLDER. By default, the status of the current working directory is displayed. + +The general structure for calling this CLI command is given below +(add``--help`` to print help info to the terminal)." """ diff --git a/gaps/cli/templates.py b/gaps/cli/templates.py index a1893c46..52c241a5 100644 --- a/gaps/cli/templates.py +++ b/gaps/cli/templates.py @@ -16,6 +16,7 @@ logger = logging.getLogger(__name__) + # pylint: disable=redefined-builtin @click.pass_context def _make_template_config(ctx, commands, type, configs): @@ -97,9 +98,13 @@ def template_command(template_configs): configs=template_configs, ), params=params, - help="Generate template config files for requested COMMANDS. If no " - "COMMANDS are given, config files for the entire pipeline are " - "generated.", + help=( + "Generate template config files for requested COMMANDS. If no " + "COMMANDS are given, config files for the entire pipeline are " + "generated.\n\nThe general structure for calling this CLI " + "command is given below (add``--help`` to print help info to " + "the terminal)." + ), epilog=None, short_help=None, options_metavar="", From 1a9c6364251b0725a68450079f21c95103884694 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Sat, 22 Jul 2023 18:43:23 -0600 Subject: [PATCH 18/61] Added minor text to doc --- gaps/cli/cli.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gaps/cli/cli.py b/gaps/cli/cli.py index 087e30ef..0911ef57 100644 --- a/gaps/cli/cli.py +++ b/gaps/cli/cli.py @@ -168,6 +168,8 @@ def make_cli(commands, info=None): name : str Name of program to display at the top of the CLI help. + This input is optional, but specifying it yields richer + documentation for the main CLI command. version : str Include program version with a ``--version`` CLI option. From 87323f95972611db23c5438446eefe70f758df08 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Sat, 22 Jul 2023 18:44:10 -0600 Subject: [PATCH 19/61] Documentation updates --- gaps/cli/command.py | 54 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/gaps/cli/command.py b/gaps/cli/command.py index 4d1417af..c339700f 100644 --- a/gaps/cli/command.py +++ b/gaps/cli/command.py @@ -101,6 +101,13 @@ def __init__( configuration files, so it should be thoroughly documented (using a `NumPy Style Python Docstring `_). + In particular, the "Extended Summary" and the "Parameters" + section will be pulled form the docstring. + + .. WARNING:: The "Extended Summary" section may not show up + properly if the short "Summary" section is + missing. + This function must return the path to the output file it generates (or a list of paths if multiple output files are generated). If no output files are generated, the function @@ -146,18 +153,24 @@ def __init__( output filename will contain the tag. Also note that this string *WILL NOT* contain a file-ending, so that will have to be added by the node function. - max_workers : int - Max number of workers to run per node. This option - will be requested from the user within the - "execution_control" dictionary in the config file. If your function is capable of multiprocessing, you should also include ``max_workers`` in the function signature. ``gaps`` will pass an integer equal to the number of processes the user wants to run on a single node for this - value. Note that the ``config`` parameter is not allowed as + value. + + .. WARNING:: The keywords ``{"max-workers", + "sites_per_worker", "mem_util_lim", "timeout", + "pool_size"}`` are assumed to describe + execution control. If you request any of these + as function arguments, users of your CLI will + specify them in the `execution_control` block + of the input config file. + + Note that the ``config`` parameter is not allowed as a function signature item. Please request all the required - keys/inputs directly. This function can also request + keys/inputs directly instead. This function can also request "private" arguments by including a leading underscore in the argument name. These arguments are NOT exposed to users in the documentation or template configuration files. Instead, @@ -370,7 +383,7 @@ def __init__( Parameters ---------- - init : class initializer + init : class The class to be initialized and used to to run on individual nodes. The class must implement ``method``. The initializer, along with the corresponding method, will be used to @@ -378,6 +391,15 @@ def __init__( files, so it should be thoroughly documented (using a `NumPy Style Python Docstring `_). + In particular, the "Extended Summary" (from the ``__init__`` + method *only*) and the "Parameters" section (from *both* the + ``__init__`` method and the run `method` given below) will + be pulled form the docstring. + + .. WARNING:: The "Extended Summary" section may not display + properly if the short "Summary" section in the + ``__init__`` method is missing. + method : str The name of a method of the ``init`` class to act as the model function to run across multiple nodes on the HPC. This @@ -426,18 +448,24 @@ def __init__( output filename will contain the tag. Also note that this string *WILL NOT* contain a file-ending, so that will have to be added by the node function. - max_workers : int - Max number of workers to run per node. This option - will be requested from the user within the - "execution_control" dictionary in the config file. If your function is capable of multiprocessing, you should also include ``max_workers`` in the function signature. ``gaps`` will pass an integer equal to the number of processes the user wants to run on a single node for this - value. Note that the ``config`` parameter is not allowed as + value. + + .. WARNING:: The keywords ``{"max-workers", + "sites_per_worker", "mem_util_lim", "timeout", + "pool_size"}`` are assumed to describe + execution control. If you request any of these + as function arguments, users of your CLI will + specify them in the `execution_control` block + of the input config file. + + Note that the ``config`` parameter is not allowed as a function signature item. Please request all the required - keys/inputs directly. This function can also request + keys/inputs directly instead. This function can also request "private" arguments by including a leading underscore in the argument name. These arguments are NOT exposed to users in the documentation or template configuration files. Instead, From 6975ec1a676c29abeba418046c620f02dcb60167 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Tue, 1 Aug 2023 13:21:22 -0600 Subject: [PATCH 20/61] Update README with more info about target audience --- README.rst | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/README.rst b/README.rst index e407e470..9b9af39e 100644 --- a/README.rst +++ b/README.rst @@ -24,21 +24,37 @@ Welcome to Geospatial Analysis Pipelines (GAPs)! .. inclusion-intro Geospatial Analysis Pipelines (GAPs) is a framework designed -to assist users in scaling their geospatial models to a -High-Performance Computing (HPC) environment. In particular, -GAPs automatically distributes the execution of a -single-location model (such as the `System Advisor Model `_) -over a large geospatial extent (e.g. CONUS) across many parallel -HPC nodes. Born from the open-source `reV `_ model, GAPs is a +to assist researchers and software developers add execution +tools to their geospatial python models. Born from the +open-source `reV `_ model, GAPs is a robust and easy-to-use engine that provides a rich set of features -such as configuration file generation, job status monitoring, -CLI Documentation, and more. - +such as command-line interface (CLI) generation and documentation, +basic High-Performance Computing (HPC) scaling capabilities, +configuration file generation, job status monitoring, and more. + + +Who should use GAPs +=================== +GAPs is intended to be used by researchers and/or software developers +who have implemented a working python model but have not yet added any +external model execution tools. Within minimal effort, developers can +use GAPs to add a variety of utility for end-users, including a complete +set of CLI commands and documentation pulled from the model run function +docstrings. In addition, GAPs provides basic HPC execution capabilities, +particularly catered towards embarrassingly parallel geospatial models +(e.g. single-location models such as the `System Advisor Model `_). +GAPs can automatically distribute the execution of such models over a large +geospatial extent (e.g. CONUS) across many parallel HPC nodes. + +GAPs is **NOT** a workflow management system (WMS), and therefore does not +provide any of the in-depth tools/capabilities expected from a proper WMS. +However, GAPs-supported models can sometimes be included as part of the workflow in +WMS tools like `Torc `_. To get started, take a look at the `documentation `_ (examples coming soon!) -Installing gaps +Installing GAPs =============== NOTE: The installation instruction below assume that you have python installed From 69784236fe41bf3e5abbb34c7293605abc783e15 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Tue, 1 Aug 2023 18:12:12 -0600 Subject: [PATCH 21/61] Add explanation about `tag` to docs --- gaps/cli/command.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/gaps/cli/command.py b/gaps/cli/command.py index c339700f..675349a5 100644 --- a/gaps/cli/command.py +++ b/gaps/cli/command.py @@ -203,7 +203,13 @@ def __init__( function *must* return the path (as a string) to the output file it generates in order for users to be able to use ``"PIPELINE"`` as the input to the ``collect_pattern`` key - in the collection config. By default, ``False``. + in the collection config. The path returned by this function + must also include the ``tag`` in the output file name in + order for collection to function properly (an easy way to do + this is to request ``tag`` in the function signature and + name the output file generated by the function using the + ``f"{out_file_name}{tag}.{extension}"`` format). + By default, ``False``. split_keys : set | container, optional A set of strings identifying the names of the config keys that ``gaps`` should split the function execution on. To @@ -497,7 +503,13 @@ def __init__( function *must* return the path (as a string) to the output file it generates in order for users to be able to use ``"PIPELINE"`` as the input to the ``collect_pattern`` key - in the collection config. By default, ``False``. + in the collection config. The path returned by this function + must also include the ``tag`` in the output file name in + order for collection to function properly (an easy way to do + this is to request ``tag`` in the function signature and + name the output file generated by the function using the + ``f"{out_file_name}{tag}.{extension}"`` format). + By default, ``False``. split_keys : set | container, optional A set of strings identifying the names of the config keys that ``gaps`` should split the function execution on. To From b7bee0e519975c80656ba3dd34a7c2fd2c8b674d Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Tue, 1 Aug 2023 18:14:42 -0600 Subject: [PATCH 22/61] Added extra tests for pipeline specification --- gaps/pipeline.py | 37 ++++++++++++++++---- tests/cli/test_cli_pipeline.py | 16 ++++++--- tests/cli/test_cli_preprocesing.py | 3 ++ tests/test_pipeline.py | 56 ++++++++++++++++++++++++------ 4 files changed, 90 insertions(+), 22 deletions(-) diff --git a/gaps/pipeline.py b/gaps/pipeline.py index 26e900b8..fb31ab37 100644 --- a/gaps/pipeline.py +++ b/gaps/pipeline.py @@ -49,13 +49,7 @@ def __init__(self, pipeline, monitor=True): self._out_dir = self._out_dir.as_posix() config = load_config(pipeline) - - if "pipeline" not in config: - raise gapsConfigError( - 'Could not find required key "pipeline" in the ' - "pipeline config." - ) - self._run_list = config["pipeline"] + self._run_list = _check_pipeline(config) self._init_status() init_logging_from_config(config) @@ -235,6 +229,35 @@ def run(cls, pipeline, monitor=True): cls(pipeline, monitor=monitor)._main() +def _check_pipeline(config): + """Check pipeline steps input.""" + + if "pipeline" not in config: + raise gapsConfigError( + "Could not find required key " + '"pipeline" ' + "in the pipeline config." + ) + + pipeline = config["pipeline"] + + if not isinstance(pipeline, list): + raise gapsConfigError( + 'Config arg "pipeline" must be a list of ' + f"{{command: f_config}} pairs, but received {type(pipeline)}." + ) + + for step_dict in pipeline: + for f_config in step_dict.values(): + if not Path(f_config).expanduser().resolve().exists(): + raise gapsConfigError( + "Pipeline step depends on non-existent " + f"file: {f_config}" + ) + + return pipeline + + def _parse_code_array(arr): """Parse array of return codes to get single return code for command.""" diff --git a/tests/cli/test_cli_pipeline.py b/tests/cli/test_cli_pipeline.py index a74df1bd..027142c3 100644 --- a/tests/cli/test_cli_pipeline.py +++ b/tests/cli/test_cli_pipeline.py @@ -30,6 +30,7 @@ {"run": "./config_run.json"}, ], } +SUCCESS_CONFIG = {"test": "success"} @pytest.fixture @@ -47,7 +48,9 @@ def runnable_pipeline(): def run(config): """Test command.""" config_fp = Path(config) - config_fp.touch() + with open(config_fp, "w") as config_file: + json.dump(SUCCESS_CONFIG, config_file) + attrs = {StatusField.JOB_STATUS: StatusOption.SUCCESSFUL} Status.make_single_job_file(config_fp.parent, "run", "test", attrs) @@ -85,11 +88,12 @@ def test_pipeline_command( """Test the pipeline_command creation.""" target_config_fp = tmp_path / "config_run.json" + target_config_fp.touch() + pipe_config_fp = tmp_path / "config_pipe.json" with open(pipe_config_fp, "w") as config_file: json.dump(SAMPLE_CONFIG, config_file) - assert not target_config_fp.exists() pipe = pipeline_command({}) if _can_run_background(): assert "background" in [opt.name for opt in pipe.params] @@ -100,7 +104,8 @@ def test_pipeline_command( if not extra_args: cli_runner.invoke(pipe, ["-c", pipe_config_fp.as_posix()]) - assert target_config_fp.exists() + with open(target_config_fp, "r") as config: + assert json.load(config) == SUCCESS_CONFIG assert_message_was_logged("Pipeline job", "INFO") assert_message_was_logged("is complete.", "INFO") @@ -164,7 +169,7 @@ def test_ppl_command_no_config_arg( target_config_fp = tmp_cwd / "config_run.json" pipe_config_fp = tmp_cwd / "config_pipeline.json" - assert not target_config_fp.exists() + target_config_fp.touch() assert not pipe_config_fp.exists() pipe = pipeline_command({}) result = cli_runner.invoke(pipe) @@ -176,7 +181,8 @@ def test_ppl_command_no_config_arg( json.dump(SAMPLE_CONFIG, config_file) cli_runner.invoke(pipe) - assert target_config_fp.exists() + with open(target_config_fp, "r") as config: + assert json.load(config) == SUCCESS_CONFIG cli_runner.invoke(pipe) assert_message_was_logged("Pipeline job", "INFO") diff --git a/tests/cli/test_cli_preprocesing.py b/tests/cli/test_cli_preprocesing.py index dfb374aa..02e2c470 100644 --- a/tests/cli/test_cli_preprocesing.py +++ b/tests/cli/test_cli_preprocesing.py @@ -84,6 +84,9 @@ def test_preprocess_collect_config_pipeline_input(tmp_path): with open(config_fp, "w") as file_: json.dump(SAMPLE_CONFIG, file_) + (tmp_path / "config.json").touch() + (tmp_path / "collect_config.json").touch() + Pipeline(config_fp) job_files = [ diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index de7578a7..1e24112f 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -38,8 +38,8 @@ SAMPLE_CONFIG = { "logging": {"log_level": "INFO"}, "pipeline": [ - {"run": "./config.json"}, - {"collect-run": "./collect_config.json"}, + {"run": "./config.{config_type}"}, + {"collect-run": "./collect_config.{config_type}"}, ], } @@ -53,17 +53,31 @@ def submit_call_cache(): @pytest.fixture def sample_config_fp_type(tmp_path): """Generate a sample config type and corresponding fp.""" - config_type = random.choice(list(ConfigType)) + types = list(ConfigType) + config_type = random.choice(types) config_fp = tmp_path / f"pipe_config.{config_type}" - return config_type, config_fp + + sample_config = { + "logging": {"log_level": "INFO"}, + "pipeline": [ + {"run": (tmp_path / "config.json").as_posix()}, + {"collect-run": (tmp_path / "collect_config.json").as_posix()}, + ], + } + return config_type, config_fp, sample_config @pytest.fixture def sample_pipeline_config(sample_config_fp_type): """Write a sample pipeline config for use in tests.""" - config_type, config_fp = sample_config_fp_type + config_type, config_fp, sample_config = sample_config_fp_type with open(config_fp, "w") as file_: - config_type.dump(SAMPLE_CONFIG, file_) + config_type.dump(sample_config, file_) + + for step_dict in sample_config["pipeline"]: + for step_config_fp in step_dict.values(): + Path(step_config_fp).touch() + return config_fp @@ -99,9 +113,28 @@ def test_pipeline_init_bad_config(tmp_path): config_fp = tmp_path / "pipe_config.json" with open(config_fp, "w") as file_: json.dump({}, file_) - with pytest.raises(gapsConfigError): + + with pytest.raises(gapsConfigError) as exc_info: + Pipeline(config_fp) + + assert "Could not find required key" in str(exc_info) + + with open(config_fp, "w") as file_: + json.dump({"pipeline": "./run_config.json"}, file_) + + with pytest.raises(gapsConfigError) as exc_info: + Pipeline(config_fp) + + assert "must be a list" in str(exc_info) + + with open(config_fp, "w") as file_: + json.dump({"pipeline": [{"run": "./dne_config.json"}]}, file_) + + with pytest.raises(gapsConfigError) as exc_info: Pipeline(config_fp) + assert "depends on non-existent file" in str(exc_info) + def test_pipeline_init(sample_pipeline_config, assert_message_was_logged): """Test initializing Pipeline.""" @@ -156,6 +189,9 @@ def test_pipeline_submit(tmp_path, runnable_pipeline): with open(config_fp, "w") as file_: config_type.dump(sample_config, file_) + for fn in ["config.json", "collect_config.json", "dne_config.json"]: + (tmp_path / fn).touch() + pipeline = Pipeline(config_fp) pipeline._submit(0) assert runnable_pipeline[-1] == (tmp_path / "config.json").as_posix() @@ -617,7 +653,7 @@ def test_parse_previous_status(sample_pipeline_config): with pytest.warns(gapsWarning): assert not parse_previous_status(sample_pipeline_config.parent, "run") - assert len(list(sample_pipeline_config.parent.glob("*"))) == 2 + assert len(list(sample_pipeline_config.parent.glob("*"))) == 4 Status.make_single_job_file( sample_pipeline_config.parent, command="run", @@ -638,13 +674,13 @@ def test_parse_previous_status(sample_pipeline_config): StatusField.OUT_FILE: ["another_test.h5", "a_third.h5"], }, ) - assert len(list(sample_pipeline_config.parent.glob("*"))) == 4 + assert len(list(sample_pipeline_config.parent.glob("*"))) == 6 out_files = parse_previous_status( sample_pipeline_config.parent, "collect-run" ) assert set(out_files) == {"test.h5", "another_test.h5", "a_third.h5"} - assert len(list(sample_pipeline_config.parent.glob("*"))) == 4 + assert len(list(sample_pipeline_config.parent.glob("*"))) == 6 ids = parse_previous_status( sample_pipeline_config.parent, "collect-run", key=StatusField.JOB_ID ) From bdd31177d6622606c808118f33df07dcbc9eb1a3 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Tue, 1 Aug 2023 18:47:35 -0600 Subject: [PATCH 23/61] Add `split_keys=None` test --- tests/cli/test_cli_config.py | 58 ++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tests/cli/test_cli_config.py b/tests/cli/test_cli_config.py index c2ba406a..5d3e11e5 100644 --- a/tests/cli/test_cli_config.py +++ b/tests/cli/test_cli_config.py @@ -374,6 +374,64 @@ def _test_kickoff(ctx, cmd, **kwargs): ) +@pytest.mark.parametrize("test_class", [False, True]) +def test_run_no_split_keys(test_ctx, runnable_script, monkeypatch, test_class): + """Test the `run` function with no split keys specified.""" + + tmp_path = test_ctx.obj["TMP_PATH"] + + if test_class: + command_config = CLICommandFromClass( + TestCommand, + "run", + name="run", + split_keys=None, + ) + else: + command_config = CLICommandFromFunction( + _testing_function, + name="run", + split_keys=None, + ) + + config = { + "execution_control": { + "option": "eagle", + "allocation": "test", + "walltime": 1, + "max_workers": 1, + }, + "input1": 1, + "input2": 7, + "input3": 8, + "_z_0": ["unsorted", "strings"], + "project_points": [0, 1, 2, 4], + } + + config_fp = tmp_path / "config.json" + with open(config_fp, "w") as config_file: + json.dump(config, config_file) + + job_names_cache = {} + + def _test_kickoff(ctx, cmd, **kwargs): + job_names_cache[ctx.obj["NAME"]] = cmd + + monkeypatch.setattr( + gaps.cli.execution, "_kickoff_hpc_job", _test_kickoff, raising=True + ) + + assert len(job_names_cache) == 0 + from_config(config_fp, command_config) + assert len(job_names_cache) == 1 + assert len(set(job_names_cache)) == 1 + + for job_name, script in job_names_cache.items(): + assert "_j0" not in job_name + assert "[0, 1, 2, 4]" in script + assert '["unsorted", "strings"]' in script + + @pytest.mark.parametrize("test_class", [False, True]) @pytest.mark.parametrize("num_nodes", [30, 100]) def test_warning_about_au_usage( From bfa508abc13357f373aacb38eb48feeae5bce65b Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Tue, 1 Aug 2023 19:02:08 -0600 Subject: [PATCH 24/61] Added test and docs for empty split key input --- gaps/cli/command.py | 22 +++++++++---- tests/cli/test_cli_config.py | 60 ++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 6 deletions(-) diff --git a/gaps/cli/command.py b/gaps/cli/command.py index 675349a5..0c0c07eb 100644 --- a/gaps/cli/command.py +++ b/gaps/cli/command.py @@ -235,9 +235,14 @@ def __init__( "paired" keys match. To allow non-iterable user input for split keys, use the ``config_preprocessor`` argument to specify a preprocessing function that converts the user - input into a list of the expected inputs. If ``None``, - execution is not split across nodes, and a single node is - always used for the function call. By default, ``None``. + input into a list of the expected inputs. If users specify + an empty list or ``None`` for a key in ``split_keys``, then + GAPs will pass ``None`` as the value for that key (i.e. if + ``split_keys=["a"]`` and users specify ``"a": []`` in their + config, then the ``function`` will be called with + ``a=None``). If ``None``, execution is not split across + nodes, and a single node is always used for the function + call. By default, ``None``. config_preprocessor : callable, optional Optional function for configuration pre-processing. The preprocessing step occurs before jobs are split across HPC @@ -535,9 +540,14 @@ def __init__( "paired" keys match. To allow non-iterable user input for split keys, use the ``config_preprocessor`` argument to specify a preprocessing function that converts the user - input into a list of the expected inputs. If ``None``, - execution is not split across nodes, and a single node is - always used for the function call. By default, ``None``. + input into a list of the expected inputs. If users specify + an empty list or ``None`` for a key in ``split_keys``, then + GAPs will pass ``None`` as the value for that key (i.e. if + ``split_keys=["a"]`` and users specify ``"a": []`` in their + config, then the ``function`` will be called with + ``a=None``). If ``None``, execution is not split across + nodes, and a single node is always used for the function + call. By default, ``None``. config_preprocessor : callable, optional Optional function for configuration pre-processing. The preprocessing step occurs before jobs are split across HPC diff --git a/tests/cli/test_cli_config.py b/tests/cli/test_cli_config.py index 5d3e11e5..f0718084 100644 --- a/tests/cli/test_cli_config.py +++ b/tests/cli/test_cli_config.py @@ -432,6 +432,66 @@ def _test_kickoff(ctx, cmd, **kwargs): assert '["unsorted", "strings"]' in script +@pytest.mark.parametrize("test_class", [False, True]) +def test_run_empty_split_keys( + test_ctx, runnable_script, monkeypatch, test_class +): + """Test the `run` function with empty split keys input.""" + + tmp_path = test_ctx.obj["TMP_PATH"] + + if test_class: + command_config = CLICommandFromClass( + TestCommand, + "run", + name="run", + split_keys=["_z_0"], + ) + else: + command_config = CLICommandFromFunction( + _testing_function, + name="run", + split_keys=["_z_0"], + ) + + config = { + "execution_control": { + "option": "eagle", + "allocation": "test", + "walltime": 1, + "max_workers": 1, + }, + "input1": 1, + "input2": 7, + "input3": 8, + "_z_0": [], + "project_points": [0, 1, 2, 4], + } + + config_fp = tmp_path / "config.json" + with open(config_fp, "w") as config_file: + json.dump(config, config_file) + + job_names_cache = {} + + def _test_kickoff(ctx, cmd, **kwargs): + job_names_cache[ctx.obj["NAME"]] = cmd + + monkeypatch.setattr( + gaps.cli.execution, "_kickoff_hpc_job", _test_kickoff, raising=True + ) + + assert len(job_names_cache) == 0 + from_config(config_fp, command_config) + assert len(job_names_cache) == 1 + assert len(set(job_names_cache)) == 1 + + for job_name, script in job_names_cache.items(): + assert "_j0" not in job_name + assert "[0, 1, 2, 4]" in script + assert '"_z_0": None' in script + + @pytest.mark.parametrize("test_class", [False, True]) @pytest.mark.parametrize("num_nodes", [30, 100]) def test_warning_about_au_usage( From 1acf53cf6fcb2c2e21632af7667579c5ac44324d Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 3 Aug 2023 14:02:12 -0600 Subject: [PATCH 25/61] Log messages no longer propagate to root logger --- gaps/__init__.py | 1 + tests/conftest.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/gaps/__init__.py b/gaps/__init__.py index a078140d..2d66ae1f 100644 --- a/gaps/__init__.py +++ b/gaps/__init__.py @@ -15,3 +15,4 @@ logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) logger.setLevel("DEBUG") +logger.propagate = False diff --git a/tests/conftest.py b/tests/conftest.py index 821f5687..bba0b120 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,13 +10,19 @@ import pytest from click.testing import CliRunner -from gaps import TEST_DATA_DIR +from gaps import TEST_DATA_DIR, logger from gaps.collection import find_h5_files LOGGING_META_FILES = {"log.py", "exceptions.py", "warnings.py"} +@pytest.fixture(autouse=True) +def include_logging(): + """Make the gaps logger propagate calls to the root.""" + logger.propagate = True + + @pytest.fixture def assert_message_was_logged(caplog): """Assert that a particular (partial) message was logged.""" From 1b08c1dedf2164f21944b33b819869eab77920b3 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 3 Aug 2023 14:02:33 -0600 Subject: [PATCH 26/61] More detailed logger print statement --- gaps/log.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/gaps/log.py b/gaps/log.py index 1d483d96..ded22726 100644 --- a/gaps/log.py +++ b/gaps/log.py @@ -190,13 +190,16 @@ def print_logging_info(): logger_names = [__name__.split(".", maxsplit=1)[0]] for name in logger_names: print(f"LOGGER: {name!r}") - log_to_debug = logging.getLogger(name) - while log_to_debug is not None: + logger_to_debug = logging.getLogger(name) + while logger_to_debug is not None: print( - f"level: {log_to_debug.level}, name: {log_to_debug.name}," - f"handlers: {log_to_debug.handlers}" + f"level: {logger_to_debug.level}, " + f"name: {logger_to_debug.name}, " + f"handlers: " ) - log_to_debug = log_to_debug.parent + for handler in logger_to_debug.handlers: + print(f"\t +++ {handler.name} - {handler}") + logger_to_debug = logger_to_debug.parent def print_logging_info_all_libraries(): From 102306d3bf444c36469093f316ebbfa3b87f5196 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 3 Aug 2023 17:00:27 -0600 Subject: [PATCH 27/61] Updated variable name to `memory_utilization_limit` --- gaps/cli/command.py | 26 ++++++++++++++------------ gaps/cli/documentation.py | 2 +- gaps/collection.py | 24 ++++++++++++------------ tests/cli/test_cli_documentation.py | 20 ++++++++++---------- tests/test_collection.py | 4 +++- 5 files changed, 40 insertions(+), 36 deletions(-) diff --git a/gaps/cli/command.py b/gaps/cli/command.py index 0c0c07eb..8cdb95f1 100644 --- a/gaps/cli/command.py +++ b/gaps/cli/command.py @@ -161,12 +161,13 @@ def __init__( value. .. WARNING:: The keywords ``{"max-workers", - "sites_per_worker", "mem_util_lim", "timeout", - "pool_size"}`` are assumed to describe - execution control. If you request any of these - as function arguments, users of your CLI will - specify them in the `execution_control` block - of the input config file. + "sites_per_worker", "memory_utilization_limit", + "timeout", "pool_size"}`` are assumed to + describe execution control. If you request any + of these as function arguments, users of your + CLI will specify them in the + `execution_control` block of the input config + file. Note that the ``config`` parameter is not allowed as a function signature item. Please request all the required @@ -467,12 +468,13 @@ def __init__( value. .. WARNING:: The keywords ``{"max-workers", - "sites_per_worker", "mem_util_lim", "timeout", - "pool_size"}`` are assumed to describe - execution control. If you request any of these - as function arguments, users of your CLI will - specify them in the `execution_control` block - of the input config file. + "sites_per_worker", "memory_utilization_limit", + "timeout", "pool_size"}`` are assumed to + describe execution control. If you request any + of these as function arguments, users of your + CLI will specify them in the + `execution_control` block of the input config + file. Note that the ``config`` parameter is not allowed as a function signature item. Please request all the required diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py index 91baf52b..28b2d765 100644 --- a/gaps/cli/documentation.py +++ b/gaps/cli/documentation.py @@ -28,7 +28,7 @@ EXTRA_EXEC_PARAMS = { "max_workers": "Maximum number of parallel workers run on each node.", "sites_per_worker": "Number of sites to run in series on a worker.", - "mem_util_lim": ( + "memory_utilization_limit": ( "Memory utilization limit (fractional). Must be a value between 0 " "and 100. This input sets how much data will be stored in-memory " "at any given time before flushing to disk." diff --git a/gaps/collection.py b/gaps/collection.py index 8f5e925f..813ba132 100644 --- a/gaps/collection.py +++ b/gaps/collection.py @@ -56,7 +56,7 @@ def __init__( gids, dataset_in, dataset_out=None, - mem_util_lim=0.7, + memory_utilization_limit=0.7, pass_through=False, ): """ @@ -74,7 +74,7 @@ def __init__( Name of dataset into which collected data is to be written. If `None` the name of the output dataset is assumed to match the dataset input name. By default, `None`. - mem_util_lim : float, optional + memory_utilization_limit : float, optional Memory utilization limit (fractional). This sets how many sites will be collected at a time. By default, `0.7`. pass_through : bool, optional @@ -96,7 +96,7 @@ def __init__( self._dataset_out = dataset_out tot_mem = psutil.virtual_memory().total - self._mem_avail = mem_util_lim * tot_mem + self._mem_avail = memory_utilization_limit * tot_mem self._axis, self._site_mem_req = self._pre_collect() logger.debug( @@ -362,7 +362,7 @@ def collect_dataset( gids, dataset_in, dataset_out=None, - mem_util_lim=0.7, + memory_utilization_limit=0.7, pass_through=False, ): """Collect a dataset from multiple source files into a single file. @@ -381,7 +381,7 @@ def collect_dataset( Name of dataset into which collected data is to be written. If `None` the name of the output dataset is assumed to match the dataset input name. By default, `None`. - mem_util_lim : float, optional + memory_utilization_limit : float, optional Memory utilization limit (fractional). This sets how many sites will be collected at a time. By default, `0.7`. pass_through : bool, optional @@ -395,7 +395,7 @@ def collect_dataset( gids, dataset_in, dataset_out=dataset_out, - mem_util_lim=mem_util_lim, + memory_utilization_limit=memory_utilization_limit, pass_through=pass_through, ) collector._collect() @@ -623,7 +623,7 @@ def collect( self, dataset_in, dataset_out=None, - mem_util_lim=0.7, + memory_utilization_limit=0.7, pass_through=False, ): """Collect a dataset from h5_dir to h5_file @@ -637,7 +637,7 @@ def collect( Name of dataset into which collected data is to be written. If `None` the name of the output dataset is assumed to match the dataset input name. By default, `None`. - mem_util_lim : float + memory_utilization_limit : float Memory utilization limit (fractional). This sets how many sites will be collected at a time. By default, `0.7`. pass_through : bool @@ -670,7 +670,7 @@ def collect( self.gids, dataset_in, dataset_out=dataset_out, - mem_util_lim=mem_util_lim, + memory_utilization_limit=memory_utilization_limit, pass_through=pass_through, ) @@ -687,7 +687,7 @@ def add_dataset( collect_pattern, dataset_in, dataset_out=None, - mem_util_lim=0.7, + memory_utilization_limit=0.7, pass_through=False, ): """Collect and add a dataset to a single HDF5 file. @@ -708,7 +708,7 @@ def add_dataset( Name of dataset into which collected data is to be written. If `None` the name of the output dataset is assumed to match the dataset input name. By default, `None`. - mem_util_lim : float + memory_utilization_limit : float Memory utilization limit (fractional). This sets how many sites will be collected at a time. By default, `0.7`. pass_through : bool @@ -735,7 +735,7 @@ def add_dataset( collector.collect( dataset_in, dataset_out=dataset_out, - mem_util_lim=mem_util_lim, + memory_utilization_limit=memory_utilization_limit, pass_through=pass_through, ) diff --git a/tests/cli/test_cli_documentation.py b/tests/cli/test_cli_documentation.py index 16f7dc11..0cb6e03f 100644 --- a/tests/cli/test_cli_documentation.py +++ b/tests/cli/test_cli_documentation.py @@ -36,7 +36,7 @@ def test_command_documentation_copies_skip_params(): def test_command_documentation_extra_exec_params(): """Test the `CommandDocumentation` with extra exec params.""" - def func(max_workers, sites_per_worker, mem_util_lim, timeout, pool_size): + def func(max_workers, sites_per_worker, memory_utilization_limit, timeout, pool_size): """A short description. Parameters @@ -45,7 +45,7 @@ def func(max_workers, sites_per_worker, mem_util_lim, timeout, pool_size): Number of workers to run. sites_per_worker : float Number of sites to run. - mem_util_lim : str + memory_utilization_limit : str A test documentation. timeout : dict A timeout value. @@ -56,7 +56,7 @@ def func(max_workers, sites_per_worker, mem_util_lim, timeout, pool_size): expected_parameters = [ "max_workers", "sites_per_worker", - "mem_util_lim", + "memory_utilization_limit", "timeout", "pool_size", ] @@ -86,13 +86,13 @@ def func(max_workers, sites_per_worker, mem_util_lim, timeout, pool_size): def test_command_documentation_extra_exec_params_no_user_doc(): """Test the `CommandDocumentation` with extra exec params no user doc.""" - def func(max_workers, sites_per_worker, mem_util_lim, timeout, pool_size): + def func(max_workers, sites_per_worker, memory_utilization_limit, timeout, pool_size): """A short description.""" expected_parameters = [ "max_workers", "sites_per_worker", - "mem_util_lim", + "memory_utilization_limit", "timeout", "pool_size", ] @@ -115,7 +115,7 @@ def test_command_documentation_extra_exec_params_user_defaults(): def func( max_workers=2, sites_per_worker=0.4, - mem_util_lim="test", + memory_utilization_limit="test", timeout=None, pool_size=None, ): @@ -127,7 +127,7 @@ def func( Number of workers to run. By default, ``2``. sites_per_worker : float, optional Number of sites to run. By default, ``0.4``. - mem_util_lim : str, optional + memory_utilization_limit : str, optional A test documentation. By default, ``"test"``. timeout : dict, optional A timeout value. By default, ``None``. @@ -138,7 +138,7 @@ def func( expected_parameters = [ "max_workers", "sites_per_worker", - "mem_util_lim", + "memory_utilization_limit", "timeout", "pool_size", ] @@ -180,7 +180,7 @@ def test_command_documentation_extra_exec_params_defaults_no_docs(): def func( max_workers=2, sites_per_worker=0.4, - mem_util_lim="test", + memory_utilization_limit="test", timeout=None, pool_size=None, ): @@ -189,7 +189,7 @@ def func( expected_parameters = [ "max_workers", "sites_per_worker", - "mem_util_lim", + "memory_utilization_limit", "timeout", "pool_size", ] diff --git a/tests/test_collection.py b/tests/test_collection.py index d3918513..a7864515 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -372,7 +372,9 @@ def test_low_mem_collect(tmp_path, collect_pattern, points_path): collect_dir, pattern = collect_pattern ctc = Collector(h5_file, collect_dir / pattern, points_path) - ctc.collect("cf_profile", dataset_out=None, mem_util_lim=0.00002) + ctc.collect( + "cf_profile", dataset_out=None, memory_utilization_limit=0.00002 + ) with h5py.File(h5_file, "r") as collected_outputs: assert "cf_profile" in collected_outputs From 63cb9a24e37118796c02a450b879f54c6edffb6f Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 3 Aug 2023 17:16:35 -0600 Subject: [PATCH 28/61] Updated pipeline log docs --- gaps/cli/documentation.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py index 28b2d765..d0aed8e8 100644 --- a/gaps/cli/documentation.py +++ b/gaps/cli/documentation.py @@ -133,7 +133,13 @@ command. logging : dict, optional Dictionary containing keyword-argument pairs to pass to - :func:`gaps.log.init_logger`. + `init_logger `_. This initializes + logging for the submission portion of the pipeline. Note, however, + that each step (command) will **also** record the submission step + log output to a common "project" log file, so it's only ever + necessary to use this input if you want a different (lower) level of + verbosity than the `log_level` specified in the config for the step + of the pipeline being executed. """ From e66907c5338672511b8f377ba837812ff2a00e45 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Fri, 4 Aug 2023 11:11:19 -0600 Subject: [PATCH 29/61] Fix bug with stdout not returning properly --- gaps/hpc.py | 4 ++-- tests/test_hpc.py | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/gaps/hpc.py b/gaps/hpc.py index 7649585d..bf0df633 100644 --- a/gaps/hpc.py +++ b/gaps/hpc.py @@ -256,7 +256,7 @@ def _teardown_submission(self, name, out, err, keep_sh=False): def _mark_job_as_submitted(self, name, out): """Mark job as submitted in the queue.""" - job_id = int(out.split(" ")[-1]) + job_id = int(_job_id_or_out(out).split(" ")[-1]) out = str(job_id) logger.debug("Job %r with id #%s submitted successfully", name, job_id) self._queue[job_id] = { @@ -655,7 +655,7 @@ def submit(cmd, background=False, background_stdout=False): return "", "" stdout, stderr = _subprocess_popen(cmd) - return _job_id_or_out(stdout), stderr + return stdout, stderr def format_walltime(hours=None): diff --git a/tests/test_hpc.py b/tests/test_hpc.py index 4f31dab7..2209fd38 100644 --- a/tests/test_hpc.py +++ b/tests/test_hpc.py @@ -286,6 +286,7 @@ def test_submit(monkeypatch): cmd = "python -c \"print('hello world')\"" cmd_id = "python -c \"print('Job ID is 12342')\"" cmd_err = "python -c \"raise ValueError('An error occurred')\"" + cmd_squeue = f'python -c "print({SQUEUE_RAW!r})"' out, err = submit(cmd, background=False, background_stdout=False) assert out == "hello world" @@ -296,7 +297,11 @@ def test_submit(monkeypatch): assert not err out, err = submit(cmd_id, background=False, background_stdout=False) - assert out == "12342" + assert out == "Job ID is 12342" + assert not err + + out, err = submit(cmd_squeue, background=False, background_stdout=False) + assert out == SQUEUE_RAW.replace("\n", "\r\n") assert not err with pytest.raises(OSError): From 7507930b6fc7d7226a32e3176ca1b3436d0703ab Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Fri, 4 Aug 2023 12:38:20 -0600 Subject: [PATCH 30/61] Update documentation --- gaps/pipeline.py | 6 ++++++ gaps/status.py | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/gaps/pipeline.py b/gaps/pipeline.py index fb31ab37..6fb58b4f 100644 --- a/gaps/pipeline.py +++ b/gaps/pipeline.py @@ -311,6 +311,12 @@ def _sort_key(status_entry): def parse_previous_status(status_dir, command, key=StatusField.OUT_FILE): """Parse key from job status(es) from the previous pipeline step. + This command DOES NOT check the HPC queue for jobs and therefore + DOES NOT update the status of previously running jobs that have + errored out of the HPC queue. For best results, ensure that all + previous steps of a pipeline have finished processing before calling + this function. + Parameters ---------- status_dir : path-like diff --git a/gaps/status.py b/gaps/status.py index 2dfa8860..8f0bbc89 100644 --- a/gaps/status.py +++ b/gaps/status.py @@ -587,6 +587,10 @@ def parse_command_status( ): """Parse key from job status(es) from the given command. + This command DOES NOT check the HPC queue for jobs and therefore + DOES NOT update the status of previously running jobs that have + errored out of the HPC queue. + Parameters ---------- status_dir : path-like From c79e175681e15e388d60d71dd92422db78690f78 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Sat, 5 Aug 2023 14:50:00 -0600 Subject: [PATCH 31/61] Major update with multiple bug fixes --- gaps/cli/execution.py | 6 +-- gaps/legacy.py | 28 ++++++++++ gaps/pipeline.py | 2 +- gaps/status.py | 84 ++++++++++++++++++++--------- tests/cli/test_cli_config.py | 94 ++++++++++++++------------------- tests/cli/test_cli_execution.py | 79 ++++++++++++++++++++++++++- tests/cli/test_cli_status.py | 84 +++++++++++++++++++++++++++++ tests/test_hpc.py | 30 +++++++++-- tests/test_legacy.py | 34 +++++++++++- tests/test_pipeline.py | 2 +- tests/test_status.py | 54 +++++++++++++++---- 11 files changed, 397 insertions(+), 100 deletions(-) diff --git a/gaps/cli/execution.py b/gaps/cli/execution.py index e3e53c6c..eb0ba910 100644 --- a/gaps/cli/execution.py +++ b/gaps/cli/execution.py @@ -61,7 +61,7 @@ def kickoff_job(ctx, cmd, exec_kwargs): exec_kwargs = _filter_exec_kwargs( exec_kwargs, hardware_option.manager.make_script_str, hardware_option ) - _kickoff_hpc_job(ctx, cmd, **exec_kwargs) + _kickoff_hpc_job(ctx, cmd, hardware_option, **exec_kwargs) def _filter_exec_kwargs(kwargs, func, hardware_option): @@ -138,7 +138,7 @@ def _kickoff_local_job(ctx, cmd): logger.info(msg) -def _kickoff_hpc_job(ctx, cmd, **kwargs): +def _kickoff_hpc_job(ctx, cmd, hardware_option, **kwargs): """Run a job (command) on the HPC.""" if not _should_run(ctx): @@ -159,7 +159,7 @@ def _kickoff_hpc_job(ctx, cmd, **kwargs): replace=True, job_attrs={ StatusField.JOB_ID: out, - StatusField.HARDWARE: HardwareOption.EAGLE, + StatusField.HARDWARE: hardware_option, StatusField.QOS: kwargs.get("qos") or QOSOption.UNSPECIFIED, StatusField.JOB_STATUS: StatusOption.SUBMITTED, StatusField.TIME_SUBMITTED: dt.datetime.now().strftime(DT_FMT), diff --git a/gaps/legacy.py b/gaps/legacy.py index c4be6613..bbeaa731 100644 --- a/gaps/legacy.py +++ b/gaps/legacy.py @@ -152,6 +152,34 @@ def make_job_file(status_dir, module, job_name, attrs): attrs=attrs, ) + @staticmethod + def _update_job_status_from_hardware(job_data, hardware_status_retriever): + """Update job status from HPC hardware if needed.""" + + # init defaults in case job/command not in status file yet + job_status = job_data.get(gaps.status.StatusField.JOB_STATUS, None) + job_id = job_data.get(gaps.status.StatusField.JOB_ID, None) + job_hardware = job_data.get(gaps.status.StatusField.HARDWARE, None) + + # get job status from hardware + current = hardware_status_retriever[job_id, job_hardware] + + # No current status and job was not successful: failed! + if ( + current is None + and job_status != gaps.status.StatusOption.SUCCESSFUL + ): + job_data[ + gaps.status.StatusField.JOB_STATUS + ] = gaps.status.StatusOption.FAILED + + # do not overwrite a successful or failed job status. + elif ( + current != job_status + and job_status not in gaps.status.StatusOption.members_as_str() + ): + job_data[gaps.status.StatusField.JOB_STATUS] = current + # pylint: disable=no-member,invalid-name,super-init-not-called class Pipeline(gaps.pipeline.Pipeline): diff --git a/gaps/pipeline.py b/gaps/pipeline.py index 6fb58b4f..e8c59c9e 100644 --- a/gaps/pipeline.py +++ b/gaps/pipeline.py @@ -142,7 +142,7 @@ def _status(self, step): status = self.status submitted = _check_jobs_submitted(status, command) if not submitted: - return StatusOption.RUNNING + return StatusOption.NOT_SUBMITTED return self._get_command_return_code(status, command) diff --git a/gaps/status.py b/gaps/status.py index 8f0bbc89..bc08febe 100644 --- a/gaps/status.py +++ b/gaps/status.py @@ -214,7 +214,7 @@ def as_df(self, commands=None, index_name="job_name", include_cols=None): include_cols = include_cols or [] output_cols = self._DF_COLUMNS + list(include_cols) - self.update_from_all_job_files(purge=False) + self.update_from_all_job_files(purge=False, check_hardware=True) if not self.data: return pd.DataFrame(columns=output_cols) @@ -235,6 +235,7 @@ def as_df(self, commands=None, index_name="job_name", include_cols=None): continue command_df[f"{StatusField.PIPELINE_INDEX}"] = command_index commands.append(command_df) + try: command_df = pd.concat(commands, sort=False) except ValueError: @@ -274,7 +275,7 @@ def dump(self): backup.unlink(missing_ok=True) - def update_from_all_job_files(self, purge=True): + def update_from_all_job_files(self, check_hardware=False, purge=True): """Update status from all single-job job status files. This method loads all single-job status files in the target @@ -283,9 +284,14 @@ def update_from_all_job_files(self, purge=True): Parameters ---------- + check_hardware : bool, optional + Option to check hardware status for job failures for jobs + with a "running" status. This is useful because the + "running" status may not be correctly updated if the job + terminates abnormally on the HPC. By default, `False`. purge : bool, optional Option to purge the individual status files. - By default,`True`. + By default, `True`. Returns ------- @@ -302,11 +308,48 @@ def update_from_all_job_files(self, purge=True): monitor_pid_info = _safe_load(monitor_pid_file, purge=purge) self.data.update(monitor_pid_info) + if check_hardware: + self._update_from_hardware() + if purge: self.dump() return self + def _update_from_hardware(self): + """Check all job status against hardware status.""" + hardware_status_retriever = HardwareStatusRetriever() + for job_data in self._job_statuses(): + self._update_job_status_from_hardware( + job_data, hardware_status_retriever + ) + + def _job_statuses(self): + """Iterate over job status dicts. Ignore other info in self.data""" + for status in self.values(): + try: + yield from _iter_job_status(status) + except AttributeError: + continue + + @staticmethod + def _update_job_status_from_hardware(job_data, hardware_status_retriever): + """Update job status to failed if it is processing but DNE on HPC.""" + + if not StatusOption( + job_data.get(StatusField.JOB_STATUS, StatusOption.NOT_SUBMITTED) + ).is_processing: + return + + job_id = job_data.get(StatusField.JOB_ID, None) + job_hardware = job_data.get(StatusField.HARDWARE, None) + + # get job status from hardware + current = hardware_status_retriever[job_id, job_hardware] + # No current status and job was not successful: failed! + if current is None: + job_data[StatusField.JOB_STATUS] = StatusOption.FAILED + def update_job_status( self, command, job_name, hardware_status_retriever=None ): @@ -334,37 +377,20 @@ def update_job_status( # Update status data dict and file if job file was found if current is not None: self.data = recursively_update_dict(self.data, current) - self.dump() # check job status via hardware if job file not found. elif command in self.data: # job exists if job_name in self.data[command]: - job_data = self.data[command][job_name] - - # init defaults in case job/command not in status file yet - previous = job_data.get(StatusField.JOB_STATUS, None) - job_id = job_data.get(StatusField.JOB_ID, None) - job_hardware = job_data.get(StatusField.HARDWARE, None) - - # get job status from hardware - current = hardware_status_retriever[job_id, job_hardware] - - # No current status and job was not successful: failed! - if current is None and previous != StatusOption.SUCCESSFUL: - job_data[StatusField.JOB_STATUS] = StatusOption.FAILED - - # do not overwrite a successful or failed job status. - elif ( - current != previous - and previous not in StatusOption.members_as_str() - ): - job_data[StatusField.JOB_STATUS] = current - + self._update_job_status_from_hardware( + self.data[command][job_name], hardware_status_retriever + ) # job does not yet exist else: self.data[command][job_name] = {} + self.dump() + def _retrieve_job_status( self, command, job_name, hardware_status_retriever ): @@ -802,3 +828,11 @@ def _validate_hardware(hardware): f"Available options are: {HardwareOption.members_as_str()}." ) raise gapsKeyError(msg) from err + + +def _iter_job_status(status): + """Iterate over job status dictionary.""" + for job_status in status.values(): + if not isinstance(job_status, dict): + continue + yield job_status diff --git a/tests/cli/test_cli_config.py b/tests/cli/test_cli_config.py index f0718084..a39ff673 100644 --- a/tests/cli/test_cli_config.py +++ b/tests/cli/test_cli_config.py @@ -201,6 +201,20 @@ def runnable_script(): _CMD_LIST.pop(0) +@pytest.fixture +def job_names_cache(monkeypatch): + """Monkeypatch `_kickoff_hpc_job` and cache call""" + cache = {} + + def _test_kickoff(ctx, cmd, __, **kwargs): + cache[ctx.obj["NAME"]] = cmd + + monkeypatch.setattr( + gaps.cli.execution, "_kickoff_hpc_job", _test_kickoff, raising=True + ) + return cache + + @pytest.mark.parametrize( ("extra_input", "none_list"), [ @@ -310,7 +324,7 @@ def pre_processing(config, a_value, a_multiplier): @pytest.mark.parametrize("test_class", [False, True]) def test_run_multiple_nodes( - test_ctx, runnable_script, monkeypatch, test_class + test_ctx, runnable_script, monkeypatch, test_class, job_names_cache ): """Test the `run` function calls `_kickoff_hpc_job` for multiple nodes.""" @@ -349,15 +363,6 @@ def test_run_multiple_nodes( with open(config_fp, "w") as config_file: json.dump(config, config_file) - job_names_cache = {} - - def _test_kickoff(ctx, cmd, **kwargs): - job_names_cache[ctx.obj["NAME"]] = cmd - - monkeypatch.setattr( - gaps.cli.execution, "_kickoff_hpc_job", _test_kickoff, raising=True - ) - assert len(job_names_cache) == 0 from_config(config_fp, command_config) assert len(job_names_cache) == 4 @@ -375,7 +380,9 @@ def _test_kickoff(ctx, cmd, **kwargs): @pytest.mark.parametrize("test_class", [False, True]) -def test_run_no_split_keys(test_ctx, runnable_script, monkeypatch, test_class): +def test_run_no_split_keys( + test_ctx, runnable_script, monkeypatch, test_class, job_names_cache +): """Test the `run` function with no split keys specified.""" tmp_path = test_ctx.obj["TMP_PATH"] @@ -412,15 +419,6 @@ def test_run_no_split_keys(test_ctx, runnable_script, monkeypatch, test_class): with open(config_fp, "w") as config_file: json.dump(config, config_file) - job_names_cache = {} - - def _test_kickoff(ctx, cmd, **kwargs): - job_names_cache[ctx.obj["NAME"]] = cmd - - monkeypatch.setattr( - gaps.cli.execution, "_kickoff_hpc_job", _test_kickoff, raising=True - ) - assert len(job_names_cache) == 0 from_config(config_fp, command_config) assert len(job_names_cache) == 1 @@ -434,7 +432,7 @@ def _test_kickoff(ctx, cmd, **kwargs): @pytest.mark.parametrize("test_class", [False, True]) def test_run_empty_split_keys( - test_ctx, runnable_script, monkeypatch, test_class + test_ctx, runnable_script, monkeypatch, test_class, job_names_cache ): """Test the `run` function with empty split keys input.""" @@ -472,15 +470,6 @@ def test_run_empty_split_keys( with open(config_fp, "w") as config_file: json.dump(config, config_file) - job_names_cache = {} - - def _test_kickoff(ctx, cmd, **kwargs): - job_names_cache[ctx.obj["NAME"]] = cmd - - monkeypatch.setattr( - gaps.cli.execution, "_kickoff_hpc_job", _test_kickoff, raising=True - ) - assert len(job_names_cache) == 0 from_config(config_fp, command_config) assert len(job_names_cache) == 1 @@ -494,8 +483,16 @@ def _test_kickoff(ctx, cmd, **kwargs): @pytest.mark.parametrize("test_class", [False, True]) @pytest.mark.parametrize("num_nodes", [30, 100]) +@pytest.mark.parametrize("option", ["eagle", "dne"]) def test_warning_about_au_usage( - test_ctx, runnable_script, monkeypatch, test_class, caplog, num_nodes + test_ctx, + runnable_script, + monkeypatch, + test_class, + caplog, + num_nodes, + option, + job_names_cache, ): """Test the `run` function calls `_kickoff_hpc_job` for multiple nodes.""" @@ -517,7 +514,7 @@ def test_warning_about_au_usage( config = { "execution_control": { - "option": "eagle", + "option": option, "allocation": "test", "qos": "high", "walltime": 50, @@ -532,25 +529,21 @@ def test_warning_about_au_usage( with open(config_fp, "w") as config_file: json.dump(config, config_file) - job_names_cache = {} - - def _test_kickoff(ctx, cmd, **kwargs): - job_names_cache[ctx.obj["NAME"]] = cmd - - monkeypatch.setattr( - gaps.cli.execution, "_kickoff_hpc_job", _test_kickoff, raising=True - ) - assert not caplog.records assert len(job_names_cache) == 0 - from_config(config_fp, command_config) - assert len(job_names_cache) == num_nodes - assert len(set(job_names_cache)) == num_nodes + try: + from_config(config_fp, command_config) + except ValueError: + pass + + if option == "eagle": + assert len(job_names_cache) == num_nodes + assert len(set(job_names_cache)) == num_nodes warnings = [ record for record in caplog.records if record.levelname == "WARNING" ] - if num_nodes < 33: + if num_nodes < 33 or option != "eagle": assert not warnings else: assert warnings @@ -562,7 +555,7 @@ def _test_kickoff(ctx, cmd, **kwargs): @pytest.mark.parametrize("test_class", [False, True]) def test_run_parallel_split_keys( - test_ctx, runnable_script, monkeypatch, test_class + test_ctx, runnable_script, monkeypatch, test_class, job_names_cache ): """Test the `run` function calls `_kickoff_hpc_job` for multiple nodes.""" @@ -599,15 +592,6 @@ def test_run_parallel_split_keys( with open(config_fp, "w") as config_file: json.dump(config, config_file) - job_names_cache = {} - - def _test_kickoff(ctx, cmd, **kwargs): - job_names_cache[ctx.obj["NAME"]] = cmd - - monkeypatch.setattr( - gaps.cli.execution, "_kickoff_hpc_job", _test_kickoff, raising=True - ) - assert len(job_names_cache) == 0 from_config(config_fp, command_config) assert len(job_names_cache) == 6 diff --git a/tests/cli/test_cli_execution.py b/tests/cli/test_cli_execution.py index 06393709..aa95d396 100644 --- a/tests/cli/test_cli_execution.py +++ b/tests/cli/test_cli_execution.py @@ -9,7 +9,7 @@ import pytest import gaps.hpc -from gaps.status import Status, StatusField, StatusOption +from gaps.status import Status, StatusField, StatusOption, HardwareOption from gaps.cli.execution import kickoff_job, _should_run from gaps.exceptions import gapsConfigError @@ -124,6 +124,7 @@ def test_kickoff_job_hpc( ): """Test kickoff command for HPC job.""" + test_ctx.obj.pop("MANAGER", None) run_dir = test_ctx.obj["TMP_PATH"] assert not list(run_dir.glob("*")) job_name = "_".join([test_ctx.obj["NAME"], str(high_qos)]) @@ -154,8 +155,9 @@ def _test_submit(cmd): assert not list(test_ctx.obj["TMP_PATH"].glob("*")) kickoff_job(test_ctx, cmd, exec_kwargs) + test_ctx.obj.pop("MANAGER", None) - assert cmd_cache + assert len(cmd_cache) == 2 assert_message_was_logged( "Found extra keys in 'execution_control'! ", "WARNING" ) @@ -183,11 +185,84 @@ def _test_submit(cmd): exec_kwargs = {"option": "eagle", "allocation": "test", "walltime": 0.43} kickoff_job(test_ctx, cmd, exec_kwargs) assert_message_was_logged("not resubmitting", "INFO") + assert len(cmd_cache) == 2 + + Status.make_single_job_file( + run_dir, + "run", + job_name, + {StatusField.JOB_STATUS: StatusOption.RUNNING}, + ) + kickoff_job(test_ctx, cmd, exec_kwargs) + assert len(cmd_cache) == 2 + + # check repeated call does not requeue HPC + kickoff_job(test_ctx, cmd, exec_kwargs) + assert len(cmd_cache) == 2 exec_kwargs = {"option": "eagle", "walltime": 0.43} with pytest.raises(gapsConfigError): kickoff_job(test_ctx, cmd, exec_kwargs) + HardwareOption.EAGLE.manager = gaps.hpc.SLURM() + test_ctx.obj.pop("MANAGER", None) + + +def test_qos_values(test_ctx, monkeypatch): + """Test kickoff command for HPC job.""" + + test_ctx.obj.pop("MANAGER", None) + run_dir = test_ctx.obj["TMP_PATH"] + assert not list(run_dir.glob("*")) + + exec_kwargs = { + "option": "eagle", + "dne_arg": 0, + "allocation": "test", + "walltime": 0.43, + "stdout_path": (test_ctx.obj["TMP_PATH"] / "stdout").as_posix(), + "qos": "dne", + } + + cmd = ( + "python -c \"import warnings; print('hello world'); " + "warnings.warn('a test warning')\"" + ) + cmd_cache = [] + + def _test_submit(cmd): + cmd_cache.append(cmd) + return "9999", None + + monkeypatch.setattr(gaps.hpc, "submit", _test_submit, raising=True) + monkeypatch.setattr( + gaps.hpc.PBS, + "_job_is_running", + lambda *__, **___: True, + raising=True, + ) + assert not cmd_cache + assert not list(test_ctx.obj["TMP_PATH"].glob("*")) + + with pytest.raises(gapsConfigError): + kickoff_job(test_ctx, cmd, exec_kwargs) + + exec_kwargs["option"] = "peregrine" + kickoff_job(test_ctx, cmd, exec_kwargs) + + status_file = list(run_dir.glob("*.json")) + assert len(status_file) == 1 + status_file = status_file[0] + assert status_file.name.endswith(".json") + + with open(status_file, "r") as status_fh: + status = json.load(status_fh) + + assert status["run"][test_ctx.obj["NAME"]][StatusField.QOS] == "dne" + + HardwareOption.EAGLE.manager = gaps.hpc.SLURM() + test_ctx.obj.pop("MANAGER", None) + if __name__ == "__main__": pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) diff --git a/tests/cli/test_cli_status.py b/tests/cli/test_cli_status.py index f40ef643..5ba85519 100644 --- a/tests/cli/test_cli_status.py +++ b/tests/cli/test_cli_status.py @@ -7,6 +7,7 @@ import psutil import pytest +from gaps.status import HardwareStatusRetriever, StatusOption from gaps.cli.status import status_command from gaps.warnings import gapsWarning @@ -37,6 +38,12 @@ def test_status(test_data_dir, cli_runner, extra_args, monkeypatch): """Test the status command.""" monkeypatch.setattr(psutil, "pid_exists", lambda *__: True, raising=True) + monkeypatch.setattr( + HardwareStatusRetriever, + "__getitem__", + lambda *__, **___: StatusOption.SUBMITTED, + raising=True, + ) status = status_command() if "dne" in extra_args: @@ -86,5 +93,82 @@ def test_status(test_data_dir, cli_runner, extra_args, monkeypatch): assert lines[-4] == "Total AUs spent: 6" +@pytest.mark.parametrize( + "extra_args", + [ + [], + "-i out_dir".split(), + "-c run".split() + "-c collect-run".split(), + "-s successful".split() + + "-s fail".split() + + "-s r".split() + + "-s pending".split() + + "-s u".split() + + "-s dne".split(), + "-c run".split() + + "-c collect-run".split() + + "-s successful".split() + + "-s fail".split() + + "-s r".split() + + "-s pending".split() + + "-s u".split() + + "-s dne".split(), + ], +) +def test_status_with_hardware_check( + test_data_dir, cli_runner, extra_args, monkeypatch +): + """Test the status command.""" + + monkeypatch.setattr(psutil, "pid_exists", lambda *__: True, raising=True) + + status = status_command() + if "dne" in extra_args: + with pytest.warns(gapsWarning): + result = cli_runner.invoke( + status, + [(test_data_dir / "test_run").as_posix()] + extra_args, + ) + else: + result = cli_runner.invoke( + status, + [(test_data_dir / "test_run").as_posix()] + extra_args, + ) + lines = result.stdout.split("\n") + cols = [ + "job_status", + "time_submitted", + "time_start", + "time_end", + "total_runtime", + "hardware", + "qos", + ] + if "out_dir" in extra_args: + cols += ["out_dir"] + cols = " ".join(cols) + + expected_partial_lines = [ + "test_run", + "MONITOR PID: 1234", + cols, + "--", + "gaps_test_run_j0 successful 0:03:38 local", + "gaps_test_run_j1 failed 0:01:05 eagle high", + "gaps_test_run_j2 failed local unspecified", + "gaps_test_run_j3 failed local", + "collect-run not submitted", + ] + + for ind, partial in enumerate(expected_partial_lines): + assert all( + string in lines[-16 + ind] for string in partial.split() + ), partial + + assert "Total Runtime" in lines[-6] + assert "Total project time" in lines[-5] + assert lines[-4] == "Total AUs spent: 6" + + if __name__ == "__main__": pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) diff --git a/tests/test_hpc.py b/tests/test_hpc.py index 2209fd38..a88b07c4 100644 --- a/tests/test_hpc.py +++ b/tests/test_hpc.py @@ -167,8 +167,28 @@ def test_check_job(manager, q_str): @pytest.mark.parametrize( - ("manager", "q_str", "kwargs", "expectation"), + ("manager", "q_str", "kwargs", "expectation", "add_qos"), [ + ( + PBS, + Q_STAT, + {"queue": "batch-h", "sh_script": "echo Hello!"}, + { + "#PBS -N submit_test": 1, + "#PBS -A rev": 2, + "#PBS -q batch-h": 3, + # cspell:disable-next-line + "#PBS -o ./stdout/submit_test_$PBS_JOBID.o": 4, + # cspell:disable-next-line + "#PBS -e ./stdout/submit_test_$PBS_JOBID.e": 5, + "#PBS -l walltime=00:26:00,qos=high": 6, + # cspell:disable-next-line + "echo Running on: $HOSTNAME, Machine Type: $MACHTYPE": 7, + "echo Running python in directory `which python`": 8, + "echo Hello!": 9, + }, + True, + ), ( PBS, Q_STAT, @@ -187,6 +207,7 @@ def test_check_job(manager, q_str): "echo Running python in directory `which python`": 8, "echo Hello!": 9, }, + False, ), ( SLURM, @@ -199,16 +220,17 @@ def test_check_job(manager, q_str): "#SBATCH --nodes=1": 4, "#SBATCH --output=./stdout/submit_test_%j.o": 5, "#SBATCH --error=./stdout/submit_test_%j.e": 6, - "#SBATCH --qos=normal": 7, + "#SBATCH --qos=high": 7, # cspell:disable-next-line "echo Running on: $HOSTNAME, Machine Type: $MACHTYPE": 8, "echo Running python in directory `which python`": 9, "echo Hello!": 10, }, + True, ), ], ) -def test_hpc_submit(manager, q_str, kwargs, expectation, monkeypatch): +def test_hpc_submit(manager, q_str, kwargs, expectation, add_qos, monkeypatch): """Test the HPC job submission utility""" queue_dict = manager.parse_queue_str(q_str) @@ -216,6 +238,8 @@ def test_hpc_submit(manager, q_str, kwargs, expectation, monkeypatch): kwargs["cmd"] = "python -c \"print('hello world')\"" kwargs["allocation"] = "rev" kwargs["walltime"] = 0.43 + if add_qos: + kwargs["qos"] = "high" out, err = hpc_manager.submit("job1", **kwargs) assert out is None assert err == "already_running" diff --git a/tests/test_legacy.py b/tests/test_legacy.py index fc2eb51a..1e1a0be4 100644 --- a/tests/test_legacy.py +++ b/tests/test_legacy.py @@ -17,6 +17,7 @@ StatusField, StatusOption, ) +import gaps.status from gaps.legacy import ( HardwareStatusRetriever, Status, @@ -143,7 +144,7 @@ def test_make_job_file_and_retrieve_job_status(temp_job_dir, job_name): assert expected_fn not in [f.name for f in tmp_path.glob("*")] assert status_fp in tmp_path.glob("*") - assert status == "R", "Failed, status is {status!r}" + assert status == "R", f"Failed, status is {status!r}" status = Status.retrieve_job_status( status_dir=tmp_path, module="generation", job_name="test1.h5" @@ -175,6 +176,37 @@ def test_make_job_file_and_retrieve_job_status(temp_job_dir, job_name): ) +def test_update_job_status(tmp_path, monkeypatch): + """Test updating job status""" + status = Status(tmp_path) + status.data = { + "run": { + StatusField.PIPELINE_INDEX: 0, + "test0": {StatusField.JOB_STATUS: "R"}, + "test1": {StatusField.JOB_STATUS: StatusOption.SUCCESSFUL}, + } + } + + monkeypatch.setattr( + gaps.status.HardwareStatusRetriever, + "__getitem__", + lambda *__, **___: "successful", + raising=True, + ) + + status.update_job_status("run", "test0") + assert ( + status.data["run"]["test0"][StatusField.JOB_STATUS] + == StatusOption.SUCCESSFUL + ) + + status.update_job_status("run", "test1") + assert ( + status.data["run"]["test1"][StatusField.JOB_STATUS] + == StatusOption.SUCCESSFUL + ) + + def test_add_job(temp_job_dir): """Test job addition and exist check""" tmp_path, status_fp = temp_job_dir diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 1e24112f..02ab0bdc 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -336,7 +336,7 @@ def test_pipeline_get_command_return_code( def test_pipeline_status(sample_pipeline_config, monkeypatch): """Test the `_status` function.""" pipeline = Pipeline(sample_pipeline_config) - assert pipeline._status(0) == StatusOption.RUNNING + assert pipeline._status(0) == StatusOption.NOT_SUBMITTED monkeypatch.setattr( HardwareStatusRetriever, "__getitem__", diff --git a/tests/test_status.py b/tests/test_status.py index 244f3309..06e89029 100644 --- a/tests/test_status.py +++ b/tests/test_status.py @@ -33,7 +33,7 @@ TEST_1_ATTRS_1 = { "job_name": "test1", - StatusField.JOB_STATUS: "R", + StatusField.JOB_STATUS: StatusOption.RUNNING, "run_id": 1234, } TEST_1_ATTRS_2 = { @@ -42,11 +42,11 @@ } TEST_2_ATTRS_1 = { "job_name": "test2", - StatusField.JOB_STATUS: "R", + StatusField.JOB_STATUS: StatusOption.RUNNING, } TEST_2_ATTRS_2 = { "job_name": "test2", - StatusField.JOB_STATUS: "R", + StatusField.JOB_STATUS: StatusOption.RUNNING, StatusField.JOB_ID: 123, } @@ -257,7 +257,7 @@ def test_make_file(temp_job_dir, job_name): assert expected_fn not in [f.name for f in tmp_path.glob("*")] assert status_fp in tmp_path.glob("*") - assert status == "R", "Failed, status is {status!r}" + assert status == StatusOption.RUNNING, f"Failed, status is {status!r}" status = Status.retrieve_job_status(tmp_path, "generation", "test1.h5") assert expected_fn not in [f.name for f in tmp_path.glob("*")] @@ -299,6 +299,9 @@ def test_update_job_status(tmp_path, monkeypatch): status.update_job_status("run", job_name) assert status.data["run"].get(job_name) == {} + tmp_path = tmp_path / "test" + tmp_path.mkdir() + Status.make_single_job_file( tmp_path, "generation", "test1", TEST_1_ATTRS_1 ) @@ -331,33 +334,66 @@ def test_update_job_status(tmp_path, monkeypatch): ) new_attrs = { "job_name": "test1", - StatusField.JOB_STATUS: "some status", + StatusField.JOB_STATUS: StatusOption.SUBMITTED, StatusField.HARDWARE: "local", } Status.make_single_job_file(tmp_path, "generation", "test1", new_attrs) status.update_job_status("generation", "test1") assert ( status.data["generation"]["test1"][StatusField.JOB_STATUS] - == "some status" + == StatusOption.SUBMITTED ) monkeypatch.setattr( HardwareStatusRetriever, "__getitem__", - lambda *__, **___: StatusOption.RUNNING, + lambda *__, **___: StatusOption.SUBMITTED, raising=True, ) status.update_job_status("generation", "test1") assert ( status.data["generation"]["test1"][StatusField.JOB_STATUS] - == StatusOption.RUNNING + == StatusOption.SUBMITTED ) # test a repeated call to hardware status.update_job_status("generation", "test1") assert ( status.data["generation"]["test1"][StatusField.JOB_STATUS] - == StatusOption.RUNNING + == StatusOption.SUBMITTED + ) + + +def test_update_job_status_with_hardware(tmp_path, monkeypatch): + """Test status update with call to hardware""" + monkeypatch.setattr( + HardwareStatusRetriever, + "__getitem__", + lambda *__, **___: None, + raising=True, + ) + + Status.make_single_job_file( + tmp_path, + "generation", + "test1", + {StatusField.JOB_STATUS: StatusOption.SUBMITTED}, + ) + Status.make_single_job_file( + tmp_path, + "generation", + "test2", + {StatusField.JOB_STATUS: StatusOption.COMPLETE}, + ) + + status = Status(tmp_path).update_from_all_job_files(check_hardware=True) + assert ( + status["generation"]["test1"][StatusField.JOB_STATUS] + == StatusOption.FAILED + ) + assert ( + status["generation"]["test2"][StatusField.JOB_STATUS] + == StatusOption.COMPLETE ) From 902f202852a714034c83df959dfb7c13af3510d4 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Sat, 5 Aug 2023 15:23:46 -0600 Subject: [PATCH 32/61] Updated message text --- gaps/cli/status.py | 2 +- tests/cli/test_cli_status.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gaps/cli/status.py b/gaps/cli/status.py index 1f7f8eb0..9e4aaf97 100644 --- a/gaps/cli/status.py +++ b/gaps/cli/status.py @@ -147,7 +147,7 @@ def color_string(string): print(f"{Style.BRIGHT}{runtime_str}{Style.RESET_ALL}") if walltime > 2: walltime_str = ( - f"Total project time (including queue): " + f"Total project wall time (including queue and downtime): " f"{_elapsed_time_as_str(walltime)}" ) print(f"{Style.BRIGHT}{walltime_str}{Style.RESET_ALL}") diff --git a/tests/cli/test_cli_status.py b/tests/cli/test_cli_status.py index 5ba85519..d2864df2 100644 --- a/tests/cli/test_cli_status.py +++ b/tests/cli/test_cli_status.py @@ -89,7 +89,7 @@ def test_status(test_data_dir, cli_runner, extra_args, monkeypatch): ), partial assert "Total Runtime" in lines[-6] - assert "Total project time" in lines[-5] + assert "Total project wall time" in lines[-5] assert lines[-4] == "Total AUs spent: 6" @@ -166,7 +166,7 @@ def test_status_with_hardware_check( ), partial assert "Total Runtime" in lines[-6] - assert "Total project time" in lines[-5] + assert "Total project wall time" in lines[-5] assert lines[-4] == "Total AUs spent: 6" From 80d23e278fe7bb44448f4be7abf50fd2fc7cdab5 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Sat, 5 Aug 2023 15:30:53 -0600 Subject: [PATCH 33/61] Runtime not added to status monitor if job fails unexpectedly --- gaps/status.py | 9 +++++---- tests/cli/test_cli_status.py | 2 ++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/gaps/status.py b/gaps/status.py index bc08febe..2278189b 100644 --- a/gaps/status.py +++ b/gaps/status.py @@ -750,10 +750,11 @@ def _elapsed_time_as_str(seconds_elapsed): def _add_elapsed_time(status_df): """Add elapsed time to status DataFrame""" - mask = ( - ~status_df[StatusField.TIME_START].isna() - & status_df[StatusField.TIME_END].isna() - ) + has_start_time = ~status_df[StatusField.TIME_START].isna() + has_no_end_time = status_df[StatusField.TIME_END].isna() + has_not_failed = status_df[StatusField.JOB_STATUS] != StatusOption.FAILED + mask = has_start_time & (has_no_end_time & has_not_failed) + start_times = status_df.loc[mask, StatusField.TIME_START] start_times = pd.to_datetime(start_times, format=DT_FMT) elapsed_times = dt.datetime.now() - start_times diff --git a/tests/cli/test_cli_status.py b/tests/cli/test_cli_status.py index d2864df2..97fc200f 100644 --- a/tests/cli/test_cli_status.py +++ b/tests/cli/test_cli_status.py @@ -164,6 +164,8 @@ def test_status_with_hardware_check( assert all( string in lines[-16 + ind] for string in partial.split() ), partial + if "failed" in lines[-16 + ind]: + assert not "(r)" in lines[-16 + ind] assert "Total Runtime" in lines[-6] assert "Total project wall time" in lines[-5] From 81ea8c5094903c8b0b22f6623fc4f5a5e5121fb2 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Mon, 7 Aug 2023 12:04:36 -0600 Subject: [PATCH 34/61] Failed jobs default to 0 runtime --- gaps/cli/status.py | 25 +++-- tests/cli/test_cli_status.py | 103 ++++++++++++++++++ .../jobstatus_gaps_test_failed_run_node0.json | 11 ++ .../jobstatus_gaps_test_failed_run_node1.json | 10 ++ .../jobstatus_gaps_test_failed_run_node2.json | 10 ++ .../test_failed_run_status.json | 26 +++++ 6 files changed, 176 insertions(+), 9 deletions(-) create mode 100644 tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node0.json create mode 100644 tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node1.json create mode 100644 tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node2.json create mode 100644 tests/data/test_failed_run/test_failed_run_status.json diff --git a/gaps/cli/status.py b/gaps/cli/status.py index 9e4aaf97..83f3d896 100644 --- a/gaps/cli/status.py +++ b/gaps/cli/status.py @@ -211,16 +211,23 @@ def main_monitor(folder, commands, status, include): df.total_runtime_seconds = run_times_seconds.sum() df.total_aus_used = aus_used.sum() - start_time = df[StatusField.TIME_SUBMITTED].fillna( - dt.datetime.now().strftime(DT_FMT) - ) - start_time = pd.to_datetime(start_time, format=DT_FMT).min() + all_jobs_failed = ( + df[StatusField.JOB_STATUS] == StatusOption.FAILED + ).all() + all_end_times_missing = df[StatusField.TIME_END].isna().all() + if all_jobs_failed and all_end_times_missing: + df.walltime = 0 + else: + start_time = df[StatusField.TIME_SUBMITTED].fillna( + dt.datetime.now().strftime(DT_FMT) + ) + start_time = pd.to_datetime(start_time, format=DT_FMT).min() - end_time = df[StatusField.TIME_END].fillna( - dt.datetime.now().strftime(DT_FMT) - ) - end_time = pd.to_datetime(end_time, format=DT_FMT).max() - df.walltime = (end_time - start_time).total_seconds() + end_time = df[StatusField.TIME_END].fillna( + dt.datetime.now().strftime(DT_FMT) + ) + end_time = pd.to_datetime(end_time, format=DT_FMT).max() + df.walltime = (end_time - start_time).total_seconds() _color_print(df, directory.name, commands, status) diff --git a/tests/cli/test_cli_status.py b/tests/cli/test_cli_status.py index 97fc200f..e0eb81d9 100644 --- a/tests/cli/test_cli_status.py +++ b/tests/cli/test_cli_status.py @@ -2,6 +2,8 @@ """ GAPs status command tests. """ +import json +import shutil from pathlib import Path import psutil @@ -172,5 +174,106 @@ def test_status_with_hardware_check( assert lines[-4] == "Total AUs spent: 6" +@pytest.mark.parametrize( + "extra_args", + [ + [], + "-i out_dir".split(), + "-c run".split() + "-c collect-run".split(), + "-s successful".split() + + "-s fail".split() + + "-s r".split() + + "-s pending".split() + + "-s u".split() + + "-s dne".split(), + "-c run".split() + + "-c collect-run".split() + + "-s successful".split() + + "-s fail".split() + + "-s r".split() + + "-s pending".split() + + "-s u".split() + + "-s dne".split(), + ], +) +@pytest.mark.parametrize("single_command", [True, False]) +def test_failed_run( + tmp_path, + test_data_dir, + cli_runner, + extra_args, + monkeypatch, + single_command, +): + """Test the status command.""" + + monkeypatch.setattr(psutil, "pid_exists", lambda *__: True, raising=True) + + status = status_command() + if single_command: + shutil.copytree( + test_data_dir / "test_failed_run", tmp_path / "test_failed_run" + ) + run_dir = (tmp_path / "test_failed_run").as_posix() + pipe_json = Path(run_dir) / "test_failed_run_status.json" + with open(pipe_json, "r") as config_file: + config = json.load(config_file) + config.pop("collect-run") + with open(pipe_json, "w") as config_file: + json.dump(config, config_file) + else: + run_dir = (test_data_dir / "test_failed_run").as_posix() + + if "dne" in extra_args: + with pytest.warns(gapsWarning): + result = cli_runner.invoke(status, [run_dir] + extra_args) + else: + result = cli_runner.invoke(status, [run_dir] + extra_args) + + lines = result.stdout.split("\n") + cols = [ + "job_status", + "time_submitted", + "time_start", + "time_end", + "total_runtime", + "hardware", + "qos", + ] + if "out_dir" in extra_args: + cols += ["out_dir"] + cols = " ".join(cols) + + expected_partial_lines = [ + "test_failed_run", + "MONITOR PID: 1234", + cols, + "--", + "gaps_test_failed_run_j0 failed local", + "gaps_test_failed_run_j1 failed local high", + "gaps_test_failed_run_j2 failed local unspecified", + ] + if single_command: + start_ind = -12 + else: + expected_partial_lines += ["collect-run not submitted"] + start_ind = -14 + + for ind, partial in enumerate(expected_partial_lines): + assert all( + string in lines[start_ind + ind] for string in partial.split() + ), partial + assert not "(r)" in lines[start_ind + ind] + assert "Total AUs spent" not in lines[start_ind + ind] + if single_command: + assert "Total project wall time" not in lines[start_ind + 10] + + if single_command: + assert "Total Runtime: 0:00:00" in lines[-4] + else: + assert "Total Runtime: 0:00:00" in lines[-5] + assert "Total project wall time" in lines[-4] + + if __name__ == "__main__": pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) diff --git a/tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node0.json b/tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node0.json new file mode 100644 index 00000000..b0e9ff67 --- /dev/null +++ b/tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node0.json @@ -0,0 +1,11 @@ +{ + "run": { + "gaps_test_failed_run_j0": { + "job_status": "submitted", + "out_dir": "/a/test/path", + "out_file": null, + "time_start": "07-Dec-2022 14:21:38", + "hardware": "local" + } + } +} \ No newline at end of file diff --git a/tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node1.json b/tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node1.json new file mode 100644 index 00000000..a3f98b33 --- /dev/null +++ b/tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node1.json @@ -0,0 +1,10 @@ +{ + "run": { + "gaps_test_failed_run_j1": { + "job_status": "failed", + "out_dir": "/a/test/path", + "time_start": "07-Dec-2022 14:25:21", + "qos": "high" + } + } +} \ No newline at end of file diff --git a/tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node2.json b/tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node2.json new file mode 100644 index 00000000..049a03d5 --- /dev/null +++ b/tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node2.json @@ -0,0 +1,10 @@ +{ + "run": { + "gaps_test_failed_run_j2": { + "job_status": "running", + "out_dir": "/a/test/path", + "time_start": "07-Dec-2022 14:26:17", + "qos": "unspecified" + } + } +} \ No newline at end of file diff --git a/tests/data/test_failed_run/test_failed_run_status.json b/tests/data/test_failed_run/test_failed_run_status.json new file mode 100644 index 00000000..5af608d0 --- /dev/null +++ b/tests/data/test_failed_run/test_failed_run_status.json @@ -0,0 +1,26 @@ +{ + "another_input": ["hello world"], + "a_dict_input": {"a_value": 42}, + "monitor_pid": 1234, + "run": { + "pipeline_index": 0, + "gaps_test_failed_run_j0": { + "job_status": "submitted", + "hardware": "local", + "time_submitted": "07-Dec-2022 14:21:33" + }, + "gaps_test_failed_run_j1": { + "job_status": "submitted", + "hardware": "local", + "time_submitted": "07-Dec-2022 14:25:17" + }, + "gaps_test_failed_run_j2": { + "job_status": "submitted", + "hardware": "local", + "time_submitted": "07-Dec-2022 14:25:17" + } + }, + "collect-run": { + "pipeline_index": 1 + } +} \ No newline at end of file From 672ef5f832c05142326876b1a5a86ff17d941854 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Mon, 7 Aug 2023 12:11:31 -0600 Subject: [PATCH 35/61] Updated log versions logic --- gaps/collection.py | 4 ++-- gaps/log.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gaps/collection.py b/gaps/collection.py index 813ba132..086a651f 100644 --- a/gaps/collection.py +++ b/gaps/collection.py @@ -14,7 +14,7 @@ import pandas as pd from rex import Resource, Outputs -from rex.utilities import log_versions +from gaps.log import log_versions from gaps.warnings import gapsCollectionWarning from gaps.exceptions import gapsRuntimeError from gaps.utilities import project_points_from_container_or_slice @@ -425,7 +425,7 @@ def __init__( Flag to purge output HDF5 file if it already exists. By default, `False`. """ - log_versions(logger) + log_versions() self.h5_out = Path(h5_file) self.collect_pattern = collect_pattern if clobber and self.h5_out.exists(): diff --git a/gaps/log.py b/gaps/log.py index ded22726..7c1b92bf 100644 --- a/gaps/log.py +++ b/gaps/log.py @@ -20,8 +20,8 @@ def log_versions(): """Log package versions.""" - gaps.logger.debug("Running with gaps version %s", gaps.__version__) - gaps.logger.debug(" - rex version %s", rex.__version__) + gaps.logger.info("Running with gaps version %s", gaps.__version__) + rex.utilities.log_versions(gaps.logger) def log_mem(log_level="DEBUG"): From 1f8af50ea2fcf69cbf8f215e1aec77726e13e59a Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Mon, 7 Aug 2023 12:48:25 -0600 Subject: [PATCH 36/61] Added timestamp to monitor output --- gaps/pipeline.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gaps/pipeline.py b/gaps/pipeline.py index e8c59c9e..2326abde 100644 --- a/gaps/pipeline.py +++ b/gaps/pipeline.py @@ -191,11 +191,12 @@ def _get_command_return_code(self, status, command): if return_code != StatusOption.FAILED and check_failed: fail_str = ", but some jobs have failed" logger.info( - "CLI command %r for job %r %s%s.", + "CLI command %r for job %r %s%s. (%s)", command, self.name, return_code.with_verb, # pylint: disable=no-member fail_str, + time.ctime(), ) return return_code From a28cb9057421511cb853de9ab177069a9966d0bb Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Mon, 7 Aug 2023 12:57:17 -0600 Subject: [PATCH 37/61] Fix error message --- gaps/pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gaps/pipeline.py b/gaps/pipeline.py index 2326abde..347f0c0e 100644 --- a/gaps/pipeline.py +++ b/gaps/pipeline.py @@ -180,7 +180,7 @@ def _get_command_return_code(self, status, command): elif job_status is None: arr.append(StatusOption.COMPLETE) else: - msg = "Job status code {job_status!r} not understood!" + msg = f"Job status code {job_status!r} not understood!" raise gapsValueError(msg) _dump_sorted(status) From dd07f7ee9193b261d788c971f5bf90d605ba25e0 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Mon, 7 Aug 2023 13:32:15 -0600 Subject: [PATCH 38/61] Add `reset_query_cache` function --- gaps/hpc.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/gaps/hpc.py b/gaps/hpc.py index bf0df633..63d636ea 100644 --- a/gaps/hpc.py +++ b/gaps/hpc.py @@ -102,6 +102,10 @@ def queue(self): return self._queue + def reset_query_cache(self): + """Reset the query dict cache so that hardware is queried again.""" + self._queue = None + def check_status_using_job_id(self, job_id): """Check the status of a job using the HPC queue and job ID. @@ -154,7 +158,7 @@ def cancel(self, arg): self.cancel(job_id) elif str(arg).lower() == "all": - self._queue = None + self.reset_query_cache() for job_id in self.queue.keys(): self.cancel(job_id) From e8c4161d365e76544714c6a8915d8429b44f1e02 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Mon, 7 Aug 2023 14:23:24 -0600 Subject: [PATCH 39/61] Monitor jobs now correctly query the hardware --- gaps/pipeline.py | 5 +++-- gaps/status.py | 8 ++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/gaps/pipeline.py b/gaps/pipeline.py index 347f0c0e..120350e2 100644 --- a/gaps/pipeline.py +++ b/gaps/pipeline.py @@ -8,7 +8,7 @@ from warnings import warn from gaps.hpc import SLURM -from gaps.status import Status, StatusOption, StatusField +from gaps.status import Status, StatusOption, StatusField, HardwareOption from gaps.utilities import recursively_update_dict from gaps.config import load_config, init_logging_from_config from gaps.exceptions import ( @@ -114,6 +114,7 @@ def _monitor(self, step, seconds=5, step_status=StatusOption.RUNNING): while step_status.is_processing: time.sleep(seconds) + HardwareOption.reset_all_cached_queries() step_status = self._status(step) if step_status == StatusOption.FAILED: @@ -156,7 +157,7 @@ def _get_command_return_code(self, status, command): # initialize return code array arr = [] check_failed = False - status.update_from_all_job_files() + status.update_from_all_job_files(check_hardware=True) if command not in status.data: # assume running diff --git a/gaps/status.py b/gaps/status.py index 2278189b..79b6fccb 100644 --- a/gaps/status.py +++ b/gaps/status.py @@ -80,6 +80,14 @@ def _new_post_hook(cls, obj, value): obj.charge_factor = 0 return obj + # pylint: disable=no-member + @classmethod + def reset_all_cached_queries(cls): + """Reset all cached hardware queries.""" + cls.EAGLE.manager.reset_query_cache() + cls.KESTREL.manager.reset_query_cache() + cls.PEREGRINE.manager.reset_query_cache() + class StatusOption(CaseInsensitiveEnum): """A collection of job status options.""" From a55d0fd7ce25ea92c4af66004d29047af6a937c1 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Mon, 7 Aug 2023 15:27:50 -0600 Subject: [PATCH 40/61] Updated status option check to be more robust --- gaps/status.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/gaps/status.py b/gaps/status.py index 79b6fccb..db8350cd 100644 --- a/gaps/status.py +++ b/gaps/status.py @@ -344,10 +344,14 @@ def _job_statuses(self): def _update_job_status_from_hardware(job_data, hardware_status_retriever): """Update job status to failed if it is processing but DNE on HPC.""" - if not StatusOption( - job_data.get(StatusField.JOB_STATUS, StatusOption.NOT_SUBMITTED) - ).is_processing: - return + status = job_data.get( + StatusField.JOB_STATUS, StatusOption.NOT_SUBMITTED + ) + try: + if not StatusOption(status).is_processing: + return + except ValueError: + pass job_id = job_data.get(StatusField.JOB_ID, None) job_hardware = job_data.get(StatusField.HARDWARE, None) From 2198571220f09305db5c4b44def871aa651cf559 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Mon, 7 Aug 2023 15:28:26 -0600 Subject: [PATCH 41/61] Do not submit pipeline if another monitor pipeline is already running --- gaps/cli/pipeline.py | 20 +++++++++++++++++++- tests/cli/test_cli_pipeline.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/gaps/cli/pipeline.py b/gaps/cli/pipeline.py index ef1dfae7..26b4857a 100644 --- a/gaps/cli/pipeline.py +++ b/gaps/cli/pipeline.py @@ -6,15 +6,18 @@ import sys import logging from pathlib import Path +from warnings import warn import click +import psutil from gaps import Pipeline from gaps.config import ConfigType from gaps.cli.documentation import _pipeline_command_help from gaps.cli.command import _WrappedCommand -from gaps.status import Status +from gaps.status import Status, StatusField from gaps.exceptions import gapsExecutionError +from gaps.warnings import gapsWarning logger = logging.getLogger(__name__) @@ -62,6 +65,7 @@ def pipeline(ctx, config_file, cancel, monitor, background=False): if cancel: Pipeline.cancel_all(config_file) return + if background: if not _can_run_background(): msg = ( @@ -72,6 +76,20 @@ def pipeline(ctx, config_file, cancel, monitor, background=False): ctx.obj["LOG_STREAM"] = False _kickoff_background(config_file) + project_dir = str(Path(config_file).parent.expanduser().resolve()) + status = Status(project_dir).update_from_all_job_files(purge=False) + monitor_pid = status.get(StatusField.MONITOR_PID) + if monitor_pid is not None and psutil.pid_exists(monitor_pid): + msg = ( + f"Another pipeline in {project_dir!r} is running on monitor PID: " + f"{monitor_pid}. Not starting a new pipeline execution." + ) + warn(msg, gapsWarning) + return + + if monitor: + Status.record_monitor_pid(Path(config_file).parent, os.getpid()) + Pipeline.run(config_file, monitor=monitor) diff --git a/tests/cli/test_cli_pipeline.py b/tests/cli/test_cli_pipeline.py index 027142c3..69b786cb 100644 --- a/tests/cli/test_cli_pipeline.py +++ b/tests/cli/test_cli_pipeline.py @@ -21,6 +21,7 @@ _can_run_background, ) from gaps.exceptions import gapsExecutionError +from gaps.warnings import gapsWarning TEST_FILE_DIR = Path(__file__).parent.as_posix() @@ -103,9 +104,12 @@ def test_pipeline_command( if not extra_args: cli_runner.invoke(pipe, ["-c", pipe_config_fp.as_posix()]) + else: + assert Status(tmp_path).get(StatusField.MONITOR_PID) == os.getpid() with open(target_config_fp, "r") as config: assert json.load(config) == SUCCESS_CONFIG + assert_message_was_logged("Pipeline job", "INFO") assert_message_was_logged("is complete.", "INFO") @@ -219,5 +223,30 @@ def _test_func(): assert config == expected_config +def test_pipeline_command_with_running_pid(tmp_path, cli_runner, monkeypatch): + """Assert pipeline_command does not start processing if existing pid.""" + + monkeypatch.setattr( + gaps.cli.pipeline.Status, + "get", + lambda *__, **___: os.getpid(), + raising=True, + ) + + pipe_config_fp = tmp_path / "config_pipe.json" + pipe_config_fp.touch() + + pipe = pipeline_command({}) + with pytest.warns(gapsWarning) as warn_info: + cli_runner.invoke(pipe, ["-c", pipe_config_fp.as_posix()], obj={}) + + assert "Another pipeline" in warn_info[0].message.args[0] + assert "is running on monitor PID:" in warn_info[0].message.args[0] + assert f"{os.getpid()}" in warn_info[0].message.args[0] + assert ( + "Not starting a new pipeline execution" in warn_info[0].message.args[0] + ) + + if __name__ == "__main__": pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) From fe04d392cbe1f5e6b34f3547d3db9066f15a4bfd Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Mon, 7 Aug 2023 15:59:22 -0600 Subject: [PATCH 42/61] Monitor jobs now cancelled if exdception detected --- gaps/pipeline.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/gaps/pipeline.py b/gaps/pipeline.py index 120350e2..9717a1d2 100644 --- a/gaps/pipeline.py +++ b/gaps/pipeline.py @@ -103,7 +103,11 @@ def _main(self): break time.sleep(1) - self._monitor(step) + try: + self._monitor(step) + except Exception as error: + self._cancel_all_jobs() + raise error else: logger.info("Pipeline job %r is complete.", self.name) From 984d61ab9391f42ceaad776fb7ffa59607991196 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Mon, 7 Aug 2023 16:00:01 -0600 Subject: [PATCH 43/61] Cancelling jobs is now hardware-aware --- gaps/pipeline.py | 9 +++++---- gaps/status.py | 5 +++++ tests/test_pipeline.py | 18 ++++++++++++++---- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/gaps/pipeline.py b/gaps/pipeline.py index 9717a1d2..effd1c2b 100644 --- a/gaps/pipeline.py +++ b/gaps/pipeline.py @@ -7,7 +7,6 @@ from pathlib import Path from warnings import warn -from gaps.hpc import SLURM from gaps.status import Status, StatusOption, StatusField, HardwareOption from gaps.utilities import recursively_update_dict from gaps.config import load_config, init_logging_from_config @@ -80,9 +79,11 @@ def name(self): def _cancel_all_jobs(self): """Cancel all jobs in this pipeline.""" - slurm_manager = SLURM() - for job_id in self.status.job_ids: - slurm_manager.cancel(job_id) + status = self.status + for job_id, hardware in zip(status.job_ids, status.job_hardware): + manager = HardwareOption(hardware).manager + if manager is not None: + manager.cancel(job_id) logger.info("Pipeline job %r cancelled.", self.name) def _main(self): diff --git a/gaps/status.py b/gaps/status.py index db8350cd..499627e4 100644 --- a/gaps/status.py +++ b/gaps/status.py @@ -201,6 +201,11 @@ def job_ids(self): """list: Flat list of job ids.""" return _get_attr_flat_list(self.data, key=StatusField.JOB_ID) + @property + def job_hardware(self): + """list: Flat list of job hardware options.""" + return _get_attr_flat_list(self.data, key=StatusField.HARDWARE) + def as_df(self, commands=None, index_name="job_name", include_cols=None): """Format status as pandas DataFrame. diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 02ab0bdc..ba7c94e6 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -18,6 +18,7 @@ Status, StatusField, StatusOption, + HardwareOption, HardwareStatusRetriever, ) from gaps.pipeline import ( @@ -521,22 +522,31 @@ def cache_cancel_calls(__, job_id): sample_pipeline_config.parent, "run", "test1", - job_attrs={StatusField.JOB_ID: 0}, + job_attrs={ + StatusField.JOB_ID: 0, + StatusField.HARDWARE: HardwareOption.LOCAL, + }, ) Status.mark_job_as_submitted( sample_pipeline_config.parent, "run", "test2", - job_attrs={StatusField.JOB_ID: 1}, + job_attrs={ + StatusField.JOB_ID: 1, + StatusField.HARDWARE: HardwareOption.EAGLE, + }, ) Status.mark_job_as_submitted( sample_pipeline_config.parent, "run", "test3", - job_attrs={StatusField.JOB_ID: 12}, + job_attrs={ + StatusField.JOB_ID: 12, + StatusField.HARDWARE: HardwareOption.EAGLE, + }, ) Pipeline.cancel_all(sample_pipeline_config) - assert set(cancelled_jobs) == {0, 1, 12} + assert set(cancelled_jobs) == {1, 12} assert_message_was_logged("Pipeline job", "INFO") assert_message_was_logged("cancelled", "INFO") From b27c3e505cc76f5606f2fd4a38131f6dc2685e3d Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Mon, 7 Aug 2023 17:03:17 -0600 Subject: [PATCH 44/61] Update logger initialization --- gaps/batch.py | 11 +-------- gaps/cli/batch.py | 3 +++ gaps/cli/pipeline.py | 4 +++- gaps/config.py | 20 +++++++++++----- gaps/pipeline.py | 4 +--- tests/cli/test_cli_pipeline.py | 43 +++++++++++++++++----------------- 6 files changed, 44 insertions(+), 41 deletions(-) diff --git a/gaps/batch.py b/gaps/batch.py index a41a2c9c..7b9f67e7 100644 --- a/gaps/batch.py +++ b/gaps/batch.py @@ -17,12 +17,7 @@ from rex.utilities import parse_year -from gaps.config import ( - load_config, - init_logging_from_config, - ConfigType, - resolve_all_paths, -) +from gaps.config import load_config, ConfigType, resolve_all_paths import gaps.cli.pipeline from gaps.pipeline import Pipeline from gaps.exceptions import gapsValueError, gapsConfigError @@ -56,8 +51,6 @@ def __init__(self, config): self._job_tags = None self._base_dir, config = _load_batch_config(config) self._pipeline_fp = Path(config["pipeline_config"]) - - init_logging_from_config(config) self._sets = _parse_config(config) logger.info("Batch job initialized with %d sub jobs.", len(self._sets)) @@ -97,14 +90,12 @@ def _make_job_dirs(self): # walk through current directory getting everything to copy for source_dir, _, filenames in os.walk(self._base_dir): - # don't make additional copies of job sub directories. if any(job_tag in source_dir for job_tag in self._sets): continue # For each dir level, iterate through the batch arg combos for tag, (arg_comb, mod_files, __) in self._sets.items(): - # Add the job tag to the directory path. # This will copy config subdirs into the job subdirs source_dir = Path(source_dir) diff --git a/gaps/cli/batch.py b/gaps/cli/batch.py index b287dbaf..beb2d7ff 100644 --- a/gaps/cli/batch.py +++ b/gaps/cli/batch.py @@ -5,12 +5,15 @@ import click import gaps.batch +from gaps.config import init_logging_from_config_file from gaps.cli.command import _WrappedCommand from gaps.cli.documentation import _batch_command_help def _batch(config_file, dry, cancel, delete, monitor_background): """Execute an analysis pipeline over a parametric set of inputs""" + init_logging_from_config_file(config_file, background=monitor_background) + if cancel: gaps.batch.BatchJob(config_file).cancel() elif delete: diff --git a/gaps/cli/pipeline.py b/gaps/cli/pipeline.py index 26b4857a..a1c16e2f 100644 --- a/gaps/cli/pipeline.py +++ b/gaps/cli/pipeline.py @@ -12,7 +12,7 @@ import psutil from gaps import Pipeline -from gaps.config import ConfigType +from gaps.config import ConfigType, init_logging_from_config_file from gaps.cli.documentation import _pipeline_command_help from gaps.cli.command import _WrappedCommand from gaps.status import Status, StatusField @@ -62,6 +62,8 @@ def pipeline(ctx, config_file, cancel, monitor, background=False): config_file = config_file[0] + init_logging_from_config_file(config_file, background=background) + if cancel: Pipeline.cancel_all(config_file) return diff --git a/gaps/config.py b/gaps/config.py index f0a5b06a..7fe73598 100644 --- a/gaps/config.py +++ b/gaps/config.py @@ -255,26 +255,34 @@ def resolve_all_paths(container, base_dir): return container -def init_logging_from_config(config): +def init_logging_from_config_file(config_file, background=False): """Init logging, taking care of legacy rex-style kwargs. Parameters ---------- - config : dict - Dictionary which may or may not contain a "logging" key. If key - not found, no further action is taken. If key is found, the - value is expected to be a dictionary of keyword-argument pairs + config_file : path-like + Path to a config file parsable as a dictionary which may or may + not contain a "logging" key. If key not found, no further action + is taken. If key is found, the value is expected to be a + dictionary of keyword-argument pairs to :func:`gaps.log.init_logger`. rex-style keys ("log_level", "log_file", "log_format") are allowed. The values of these inputs are used to initialize a gaps logger. + background : bool, optional + Optional flag to specify background job initialization. If + ``True``, then stream output is disabled. By default, ``False``. """ - + config = load_config(config_file) if "logging" not in config: return + kwargs = config["logging"] kwarg_map = {"log_level": "level", "log_file": "file", "log_format": "fmt"} for legacy_kwarg, new_kwarg in kwarg_map.items(): if legacy_kwarg in kwargs: kwargs[new_kwarg] = kwargs.pop(legacy_kwarg) + if background: + kwargs["stream"] = False + init_logger(**kwargs) diff --git a/gaps/pipeline.py b/gaps/pipeline.py index effd1c2b..7d3bae95 100644 --- a/gaps/pipeline.py +++ b/gaps/pipeline.py @@ -9,7 +9,7 @@ from gaps.status import Status, StatusOption, StatusField, HardwareOption from gaps.utilities import recursively_update_dict -from gaps.config import load_config, init_logging_from_config +from gaps.config import load_config from gaps.exceptions import ( gapsConfigError, gapsExecutionError, @@ -51,8 +51,6 @@ def __init__(self, pipeline, monitor=True): self._run_list = _check_pipeline(config) self._init_status() - init_logging_from_config(config) - def _init_status(self): """Initialize the status json in the output directory.""" status = self.status diff --git a/tests/cli/test_cli_pipeline.py b/tests/cli/test_cli_pipeline.py index 69b786cb..b6a2d71b 100644 --- a/tests/cli/test_cli_pipeline.py +++ b/tests/cli/test_cli_pipeline.py @@ -35,15 +35,28 @@ @pytest.fixture -def runnable_pipeline(): +def runnable_pipeline(tmp_path): """Add run to pipeline commands for test only.""" try: Pipeline.COMMANDS["run"] = run + pipe_config_fp = tmp_path / "config_pipe.json" + with open(pipe_config_fp, "w") as config_file: + json.dump(SAMPLE_CONFIG, config_file) yield finally: Pipeline.COMMANDS.pop("run") +@pytest.fixture +def pipe_config_fp(tmp_path): + """Add a sample pipeline config to a temp directory.""" + pipe_config_fp = tmp_path / "config_pipe.json" + with open(pipe_config_fp, "w") as config_file: + json.dump(SAMPLE_CONFIG, config_file) + + yield pipe_config_fp + + @click.command() @click.option("--config", "-c", default=".", help="Path to config file") def run(config): @@ -57,7 +70,7 @@ def run(config): # pylint: disable=no-value-for-parameter -def test_can_run_background(monkeypatch, test_ctx, tmp_path): +def test_can_run_background(monkeypatch, test_ctx, pipe_config_fp): """Test the `_can_run_background` method""" monkeypatch.setattr(os, "setsid", lambda: None, raising=False) @@ -70,10 +83,8 @@ def test_can_run_background(monkeypatch, test_ctx, tmp_path): assert not _can_run_background() - pipe_config_fp = tmp_path / "config_pipe.json" - pipe_config_fp.touch() with pytest.raises(gapsExecutionError) as exc_info: - pipeline(tmp_path, cancel=False, monitor=False, background=True) + pipeline(pipe_config_fp, cancel=False, monitor=False, background=True) assert "Cannot run pipeline in background on system" in str(exc_info) @@ -84,6 +95,7 @@ def test_pipeline_command( tmp_path, cli_runner, runnable_pipeline, + pipe_config_fp, assert_message_was_logged, ): """Test the pipeline_command creation.""" @@ -91,10 +103,6 @@ def test_pipeline_command( target_config_fp = tmp_path / "config_run.json" target_config_fp.touch() - pipe_config_fp = tmp_path / "config_pipe.json" - with open(pipe_config_fp, "w") as config_file: - json.dump(SAMPLE_CONFIG, config_file) - pipe = pipeline_command({}) if _can_run_background(): assert "background" in [opt.name for opt in pipe.params] @@ -118,7 +126,7 @@ def test_pipeline_command( "extra_args", [["--background"], ["--monitor", "--background"]] ) def test_pipeline_command_with_background( - extra_args, tmp_path, cli_runner, monkeypatch + extra_args, pipe_config_fp, cli_runner, monkeypatch ): """Test the pipeline_command creation with background.""" @@ -133,9 +141,6 @@ def _new_run(config_file): monkeypatch.setattr(os, "setsid", lambda: None, raising=False) monkeypatch.setattr(os, "fork", lambda: None, raising=False) - pipe_config_fp = tmp_path / "config_pipe.json" - pipe_config_fp.touch() - pipe = pipeline_command({}) assert "background" in [opt.name for opt in pipe.params] assert not kickoff_background_cache @@ -147,12 +152,9 @@ def _new_run(config_file): assert pipe_config_fp.as_posix() in kickoff_background_cache[0] -def test_pipeline_command_cancel(tmp_path, cli_runner, monkeypatch): +def test_pipeline_command_cancel(pipe_config_fp, cli_runner, monkeypatch): """Test the pipeline_command with --cancel.""" - pipe_config_fp = tmp_path / "config_pipe.json" - pipe_config_fp.touch() - def _new_cancel(config): assert config == pipe_config_fp.as_posix() @@ -223,7 +225,9 @@ def _test_func(): assert config == expected_config -def test_pipeline_command_with_running_pid(tmp_path, cli_runner, monkeypatch): +def test_pipeline_command_with_running_pid( + pipe_config_fp, cli_runner, monkeypatch +): """Assert pipeline_command does not start processing if existing pid.""" monkeypatch.setattr( @@ -233,9 +237,6 @@ def test_pipeline_command_with_running_pid(tmp_path, cli_runner, monkeypatch): raising=True, ) - pipe_config_fp = tmp_path / "config_pipe.json" - pipe_config_fp.touch() - pipe = pipeline_command({}) with pytest.warns(gapsWarning) as warn_info: cli_runner.invoke(pipe, ["-c", pipe_config_fp.as_posix()], obj={}) From b10ccbc04e9cbb837a24d5486aa89ca544209c7d Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Mon, 7 Aug 2023 22:18:03 -0600 Subject: [PATCH 45/61] Tabs now grouped together in docs --- gaps/cli/documentation.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py index d0aed8e8..432310fa 100644 --- a/gaps/cli/documentation.py +++ b/gaps/cli/documentation.py @@ -2,6 +2,7 @@ """ CLI documentation utilities. """ +import sys from copy import deepcopy from functools import lru_cache from inspect import signature, isclass @@ -107,17 +108,17 @@ .. tabs:: - .. tab:: JSON + .. group-tab:: JSON :: {template_json_config} - .. tab:: YAML + .. group-tab:: YAML :: {template_yaml_config} - .. tab:: TOML + .. group-tab:: TOML :: {template_toml_config} @@ -148,17 +149,17 @@ .. tabs:: - .. tab:: JSON + .. group-tab:: JSON :: {template_json_config} - .. tab:: YAML + .. group-tab:: YAML :: {template_yaml_config} - .. tab:: TOML + .. group-tab:: TOML :: {template_toml_config} @@ -182,17 +183,17 @@ .. tabs:: - .. tab:: JSON + .. group-tab:: JSON :: {sample_json_args_dict} - .. tab:: YAML + .. group-tab:: YAML :: {sample_yaml_args_dict} - .. tab:: TOML + .. group-tab:: TOML :: {sample_toml_args_dict} @@ -219,17 +220,17 @@ .. tabs:: - .. tab:: JSON + .. group-tab:: JSON :: {sample_json_files} - .. tab:: YAML + .. group-tab:: YAML :: {sample_yaml_files} - .. tab:: TOML + .. group-tab:: TOML :: {sample_toml_files} @@ -248,17 +249,17 @@ .. tabs:: - .. tab:: JSON + .. group-tab:: JSON :: {template_json_config} - .. tab:: YAML + .. group-tab:: YAML :: {template_yaml_config} - .. tab:: TOML + .. group-tab:: TOML :: {template_toml_config} From c5da3958e7005e370bd637fc03e230068f6d44bc Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Tue, 8 Aug 2023 16:12:06 -0600 Subject: [PATCH 46/61] Set max width on terminal to look good with docs --- gaps/cli/cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gaps/cli/cli.py b/gaps/cli/cli.py index 0911ef57..fb30fff7 100644 --- a/gaps/cli/cli.py +++ b/gaps/cli/cli.py @@ -216,3 +216,4 @@ def _main_cb(ctx, verbose): """Set the obj and verbose settings of the commands.""" ctx.ensure_object(dict) ctx.obj["VERBOSE"] = verbose + ctx.max_content_width = 92 From da1c6f346ed8d53954b07bdcf400a187af26a2cf Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Tue, 8 Aug 2023 16:12:31 -0600 Subject: [PATCH 47/61] Add `_is_sphinx_build` utility function --- gaps/utilities.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gaps/utilities.py b/gaps/utilities.py index b401596b..9e83f5ae 100644 --- a/gaps/utilities.py +++ b/gaps/utilities.py @@ -2,6 +2,7 @@ """ GAPs utilities. """ +import sys import copy import logging import collections @@ -165,3 +166,8 @@ def resolve_path(path, base_dir): pass return path + + +def _is_sphinx_build(): + """``True`` if sphinx is in system modules, else ``False``""" + return "sphinx" in sys.modules From 10876925bd50c4d739c01066a8de4d3eedc96e15 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Tue, 8 Aug 2023 18:24:52 -0600 Subject: [PATCH 48/61] CLI command now says "Config Parameters" --- gaps/cli/command.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/gaps/cli/command.py b/gaps/cli/command.py index 8cdb95f1..b9e56c29 100644 --- a/gaps/cli/command.py +++ b/gaps/cli/command.py @@ -12,6 +12,7 @@ from gaps.cli.config import GAPS_SUPPLIED_ARGS from gaps.cli.documentation import CommandDocumentation from gaps.cli.preprocessing import split_project_points_into_ranges +from gaps.utilities import _is_sphinx_build class AbstractBaseCLICommandConfiguration(ABC): @@ -711,6 +712,12 @@ def wrap_text( ) if "Parameters\n----------" not in wrapped_text.replace(" ", ""): wrapped_text = wrapped_text.replace(".\n", ".\n\n") + elif not _is_sphinx_build(): # pragma: no cover + wrapped_text = wrapped_text.replace( + "Parameters\n----------", + "\nConfig Parameters\n-----------------", + ) + return wrapped_text click.formatting.wrap_text = wrap_text From 8105c8285c67fdd4f952c470732c2602500f85d9 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 9 Aug 2023 10:42:33 -0600 Subject: [PATCH 49/61] Big update to documentation formatting to support pretty docs in both CLI and SPhinx --- gaps/cli/documentation.py | 402 +++++++++++++++------------- tests/cli/test_cli_documentation.py | 30 ++- 2 files changed, 248 insertions(+), 184 deletions(-) diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py index 432310fa..0cf6974b 100644 --- a/gaps/cli/documentation.py +++ b/gaps/cli/documentation.py @@ -2,14 +2,15 @@ """ CLI documentation utilities. """ -import sys from copy import deepcopy +from itertools import chain from functools import lru_cache from inspect import signature, isclass from numpydoc.docscrape import NumpyDocString from gaps.config import config_as_str_for_docstring, ConfigType +from gaps.utilities import _is_sphinx_build DEFAULT_EXEC_VALUES = { @@ -27,21 +28,19 @@ } EXTRA_EXEC_PARAMS = { - "max_workers": "Maximum number of parallel workers run on each node.", - "sites_per_worker": "Number of sites to run in series on a worker.", - "memory_utilization_limit": ( - "Memory utilization limit (fractional). Must be a value between 0 " - "and 100. This input sets how much data will be stored in-memory " - "at any given time before flushing to disk." - ), - "timeout": ( - "Number of seconds to wait for parallel run iteration to complete " - "before early termination" - ), - "pool_size": ( - "Number of futures to submit to a single process pool for " - "parallel futures." - ), + "max_workers": """Maximum number of parallel workers run on each node.""", + "sites_per_worker": """Number of sites to run in series on a worker.""", + "memory_utilization_limit": """Memory utilization limit (fractional). Must be a value + between 0 and 100. This input sets how much data will + be stored in-memory at any given time before flushing + to disk. + """, + "timeout": """Number of seconds to wait for parallel run iteration + to complete before early termination. + """, + "pool_size": """Number of futures to submit to a single process pool + for parallel futures. + """, } CONFIG_TYPES = [ @@ -102,86 +101,71 @@ """ PIPELINE_CONFIG_DOC = """ -Path to the ``pipeline`` configuration file. This argument can be left out, -but *one and only one file* with "pipeline" in the name should exist in the -directory and contain the config information. Below is a sample template config - -.. tabs:: - - .. group-tab:: JSON - :: - - {template_json_config} - - .. group-tab:: YAML - :: - - {template_yaml_config} - - .. group-tab:: TOML - :: - - {template_toml_config} - +Path to the ``pipeline`` configuration file. This argument can be +left out, but *one and only one file* with "pipeline" in the +name should exist in the directory and contain the config +information. {sample_config} Parameters ---------- pipeline : list of dicts - A list of dictionaries, where each dictionary represents one step - in the pipeline. Each dictionary should have one key-value pair, - where the key is the name of the CLI command to run, and the value - is the path to a config file containing the configuration for that - command. + A list of dictionaries, where each dictionary represents one + step in the pipeline. Each dictionary should have one + key-value pair, where the key is the name of the CLI + command to run, and the value is the path to a config + file containing the configuration for that command. logging : dict, optional Dictionary containing keyword-argument pairs to pass to - `init_logger `_. This initializes - logging for the submission portion of the pipeline. Note, however, - that each step (command) will **also** record the submission step - log output to a common "project" log file, so it's only ever - necessary to use this input if you want a different (lower) level of - verbosity than the `log_level` specified in the config for the step - of the pipeline being executed. + `init_logger `_. This + initializes logging for the submission portion of the + pipeline. Note, however, that each step (command) will + **also** record the submission step log output to a + common "project" log file, so it's only ever necessary + to use this input if you want a different (lower) level + of verbosity than the `log_level` specified in the + config for the step of the pipeline being executed. """ BATCH_CONFIG_DOC = """ -Path to the ``batch`` configuration file. Below is a sample template config - -.. tabs:: - - .. group-tab:: JSON - :: - - {template_json_config} - - .. group-tab:: YAML - :: - - {template_yaml_config} - - .. group-tab:: TOML - :: - - {template_toml_config} - +Path to the ``batch`` configuration file. {sample_config} Parameters ---------- pipeline_config : str - Path to the pipeline configuration defining the commands to run for - every parametric set. + Path to the pipeline configuration defining the commands to + run for every parametric set. sets : list of dicts - A list of dictionaries, where each dictionary defines a "set" of - parametric runs. Each dictionary should have the following keys: + A list of dictionaries, where each dictionary defines a + "set" of parametric runs. Each dictionary should have + the following keys: args : dict A dictionary defining the arguments across all input - configuration files to parameterize. Each argument to be - parametrized should be a key in this dictionary, and the - value should be a list of the parameter values to run for - this argument. + configuration files to parameterize. Each argument + to be parametrized should be a key in this + dictionary, and the value should be a list of the + parameter values to run for this argument. - .. tabs:: + {batch_args_dict} + + Remember that the keys in the ``args`` dictionary + should be part of (at least) one of your other + configuration files. + files : list + A list of paths to the configuration files that + contain the arguments to be updated for every + parametric run. Arguments can be spread out over + multiple files. {batch_files} + set_tag : str, optional + Optional string defining a set tag that will prefix + each job tag for this set. This tag does not need to + include an underscore, as that is provided during + concatenation. + + +""" +_BATCH_ARGS_DICT = """.. tabs:: .. group-tab:: JSON :: @@ -199,8 +183,8 @@ {sample_toml_args_dict} - This example would run a total of six pipelines, one with - each of the following arg combinations: + This example would run a total of six pipelines, one + with each of the following arg combinations: :: input_constant_1=18.20, path_to_a_file="/first/path.h5" @@ -211,12 +195,8 @@ input_constant_1=19.04, path_to_a_file="/third/path.h5" - Remember that the keys in the ``args`` dictionary should be - part of (at least) one of your other configuration files. - files : list - A list of paths to the configuration files that contain the - arguments to be updated for every parametric run. Arguments - can be spread out over multiple files. For example: +""" +_BATCH_FILES = """For example: .. tabs:: @@ -236,16 +216,16 @@ {sample_toml_files} - set_tag : str, optional - Optional string defining a set tag that will prefix each - job tag for this set. This tag does not need to include an - underscore, as that is provided during concatenation. - - """ CONFIG_DOC = """ -Path to the ``{name}`` configuration file. Below is a sample template config +Path to the ``{name}`` configuration file. {sample_config} + +{docstring} + +Note that you may remove any keys with a ``null`` value if you do not intend to update them yourself. +""" +SAMPLE_CONFIG_DOC = """Below is a sample template config .. tabs:: @@ -264,10 +244,6 @@ {template_toml_config} - -{docstring} - -Note that you may remove any keys with a ``null`` value if you do not intend to update them yourself. """ COMMAND_DOC = """ Execute the ``{name}`` step from a config file. @@ -286,29 +262,38 @@ arguments are: :option: ({{'local', 'kestrel', 'eagle', 'peregrine'}}) - Hardware run option. Determines the type of job scheduler - tp use as well as the base AU cost. - :allocation: (str) HPC project (allocation) handle. - :walltime: (int) Node walltime request in hours. - :qos: (str, optional) Quality-of-service specifier. On Eagle or - Kestrel, this should be one of {{'standby', 'normal', 'high'}}. - Note that 'high' priority doubles the AU cost. By default, - ``"normal"``. - :memory: (int, optional) Node memory request in GB. By default, - ``None``, which does not specify a memory limit.{n}{eep} - :queue: (str, optional; PBS ONLY) HPC queue to submit job to. - Examples include: 'debug', 'short', 'batch', 'batch-h', - 'long', etc. By default, ``None``, which uses "test_queue". - :feature: (str, optional) Additional flags for SLURM job - (e.g. "-p debug"). By default, ``None``, which does not - specify any additional flags. - :conda_env: (str, optional) Name of conda environment to activate. - By default, ``None``, which does not load any environments. - :module: (str, optional) Module to load. By default, ``None``, which - does not load any modules. - :sh_script: (str, optional) Extra shell script to run before - command call. By default, ``None``, which does not run any - scripts. + Hardware run option. Determines the type of job + scheduler to use as well as the base AU cost. + :allocation: (str) + HPC project (allocation) handle. + :walltime: (int) + Node walltime request in hours. + :qos: (str, optional) + Quality-of-service specifier. On Eagle or + Kestrel, this should be one of {{'standby', 'normal', + 'high'}}. Note that 'high' priority doubles the AU + cost. By default, ``"normal"``. + :memory: (int, optional) + Node memory request in GB. By default, ``None``, which + does not specify a memory limit.{n}{eep} + :queue: (str, optional; PBS ONLY) + HPC queue to submit job to. Examples include: 'debug', + 'short', 'batch', 'batch-h', 'long', etc. + By default, ``None``, which uses "test_queue". + :feature: (str, optional) + Additional flags for SLURM job (e.g. "-p debug"). + By default, ``None``, which does not specify any + additional flags. + :conda_env: (str, optional) + Name of conda environment to activate. By default, + ``None``, which does not load any environments. + :module: (str, optional) + Module to load. By default, ``None``, which does not + load any modules. + :sh_script: (str, optional) + Extra shell script to run before command call. + By default, ``None``, which does not run any + scripts. Only the `option` key is required for local execution. For execution on the HPC, the `allocation` and `walltime` keys are also @@ -326,12 +311,13 @@ """ NODES_DOC = ( - "\n :nodes: (int, optional) Number of nodes to split the project " - "\n points across. Note that the total number of requested " - "\n nodes for a job may be larger than this value if the " - "\n command splits across other inputs. Default is ``1``." + "\n :nodes: (int, optional)" + "\n Number of nodes to split the project points across. " + "\n Note that the total number of requested nodes for " + "\n a job may be larger than this value if the command" + "\n splits across other inputs. Default is ``1``." ) -EXTRA_EXEC_PARAM_DOC = "\n :{name}: ({type}) {desc}" +EXTRA_EXEC_PARAM_DOC = "\n :{name}: ({type})\n {desc}" class CommandDocumentation: @@ -484,7 +470,7 @@ def parameter_help(self): for p in doc["Parameters"] if p.name in self.template_config ] - return str(param_only_doc) + return "\n".join(_format_lines(str(param_only_doc).split("\n"))) @property def extended_summary(self): @@ -507,19 +493,27 @@ def config_help(self, command_name): str Help string for the config file. """ - return CONFIG_DOC.format( + if _is_sphinx_build(): + sample_config = SAMPLE_CONFIG_DOC.format( + template_json_config=config_as_str_for_docstring( + self.template_config, config_type=ConfigType.JSON + ), + template_yaml_config=config_as_str_for_docstring( + self.template_config, config_type=ConfigType.YAML + ), + template_toml_config=config_as_str_for_docstring( + self.template_config, config_type=ConfigType.TOML + ), + ) + else: + sample_config = "" + + doc = CONFIG_DOC.format( name=command_name, - template_json_config=config_as_str_for_docstring( - self.template_config, config_type=ConfigType.JSON - ), - template_yaml_config=config_as_str_for_docstring( - self.template_config, config_type=ConfigType.YAML - ), - template_toml_config=config_as_str_for_docstring( - self.template_config, config_type=ConfigType.TOML - ), + sample_config=sample_config, docstring=self.parameter_help, ) + return _cli_formatted(doc) def command_help(self, command_name): """Generate a help string for a command. @@ -535,9 +529,8 @@ def command_help(self, command_name): str Help string for the command. """ - return COMMAND_DOC.format( - name=command_name, desc=self.extended_summary - ) + doc = COMMAND_DOC.format(name=command_name, desc=self.extended_summary) + return _cli_formatted(doc) @lru_cache(maxsize=16) def _param_value(self, param): @@ -582,23 +575,32 @@ def _main_command_help(prog_name, commands): return MAIN_DOC.format(name=prog_name, cli_help_str=cli_help_str) -def _pipeline_command_help(pipeline_config): +def _pipeline_command_help(pipeline_config): # pragma: no cover """Generate pipeline command help from a sample config.""" - format_inputs = {} + if not _is_sphinx_build(): + return _cli_formatted(PIPELINE_CONFIG_DOC.format(sample_config="")) + template_names = [ "template_json_config", "template_yaml_config", "template_toml_config", ] - for name, c_type in zip(template_names, CONFIG_TYPES): - format_inputs[name] = config_as_str_for_docstring( - pipeline_config, config_type=c_type - ) - return PIPELINE_CONFIG_DOC.format(**format_inputs) + sample_config = SAMPLE_CONFIG_DOC.format( + **_format_dict(pipeline_config, template_names) + ) + doc = PIPELINE_CONFIG_DOC.format(sample_config=sample_config) + return _cli_formatted(doc) -def _batch_command_help(): +def _batch_command_help(): # pragma: no cover """Generate batch command help from a sample config.""" + if not _is_sphinx_build(): + doc = BATCH_CONFIG_DOC.format( + sample_config="", batch_args_dict="", batch_files="" + ) + return _cli_formatted(doc) + + format_inputs = {} template_config = { "pipeline_config": CommandDocumentation.REQUIRED_TAG, "sets": [ @@ -614,6 +616,16 @@ def _batch_command_help(): }, ], } + template_names = [ + "template_json_config", + "template_yaml_config", + "template_toml_config", + ] + + format_inputs["sample_config"] = SAMPLE_CONFIG_DOC.format( + **_format_dict(template_config, template_names) + ) + sample_args_dict = { "args": { "input_constant_1": [18.02, 19.04], @@ -624,56 +636,88 @@ def _batch_command_help(): ], } } - sample_files = {"files": ["./config_run.yaml", "./config_analyze.json"]} - format_inputs = {} - template_names = [ - "template_json_config", - "template_yaml_config", - "template_toml_config", - ] - for name, c_type in zip(template_names, CONFIG_TYPES): - format_inputs[name] = config_as_str_for_docstring( - template_config, config_type=c_type - ) - sample_args_names = [ "sample_json_args_dict", "sample_yaml_args_dict", "sample_toml_args_dict", ] - for name, c_type in zip(sample_args_names, CONFIG_TYPES): - format_inputs[name] = config_as_str_for_docstring( - sample_args_dict, - config_type=c_type, - num_spaces=20 if "json" in name else 24, - ) - if "json" in name: - format_inputs[name] = "\n".join( - format_inputs[name].split("\n")[1:-1] - ).lstrip() + format_inputs["batch_args_dict"] = _BATCH_ARGS_DICT.format( + **_format_dict(sample_args_dict, sample_args_names, batch_docs=True) + ) + sample_files = {"files": ["./config_run.yaml", "./config_analyze.json"]} sample_files_names = [ "sample_json_files", "sample_yaml_files", "sample_toml_files", ] - for name, c_type in zip(sample_files_names, CONFIG_TYPES): - format_inputs[name] = config_as_str_for_docstring( - sample_files, + format_inputs["batch_files"] = _BATCH_FILES.format( + **_format_dict(sample_files, sample_files_names, batch_docs=True) + ) + + doc = BATCH_CONFIG_DOC.format(**format_inputs) + return _cli_formatted(doc) + + +def _format_dict(sample, names, batch_docs=False): # pragma: no cover + """Format a sample into a documentation config""" + configs = {} + for name, c_type in zip(names, CONFIG_TYPES): + configs[name] = config_as_str_for_docstring( + sample, config_type=c_type, - num_spaces=20 if "json" in name else 24, + num_spaces=(20 if "json" in name else 24) if batch_docs else 12, ) - if "json" in name: - format_inputs[name] = "\n".join( - format_inputs[name].split("\n")[1:-1] - ).lstrip() - - return BATCH_CONFIG_DOC.format(**format_inputs) + if batch_docs and "json" in name: + configs[name] = "\n".join(configs[name].split("\n")[1:-1]).lstrip() + return configs def _as_functions(functions): - """Yield items from input, converting all classes to their init funcs""" + """Yield items from input, converting all classes to their init methods""" for func in functions: if isclass(func): func = func.__init__ yield func + + +def _line_needs_newline(line): + """Determine wether a newline should be added to the current line""" + if any( + f":{key}:" in line + for key in chain(DEFAULT_EXEC_VALUES, EXTRA_EXEC_PARAMS) + ): + return True + if not line.startswith(" "): + return True + if len(line) <= 4: + return True + if line[4] == " ": + return True + return False + + +def _format_lines(lines): + """Format docs into longer lines of text for easier wrapping in CLI""" + new_lines = [lines[0]] + current_line = [] + for line in lines[1:]: + if _line_needs_newline(line): + current_line = " ".join(current_line) + if current_line: + line = "\n".join([current_line, line]) + new_lines.append(line) + current_line = [] + else: + if current_line: + line = line.lstrip() + current_line.append(line) + + return new_lines + + +def _cli_formatted(doc): + """Apply minor formatting changes for strings when displaying to CLI""" + if not _is_sphinx_build(): + doc = doc.replace("``", "`").replace("{{", "{").replace("}}", "}") + return doc diff --git a/tests/cli/test_cli_documentation.py b/tests/cli/test_cli_documentation.py index 0cb6e03f..668a5b66 100644 --- a/tests/cli/test_cli_documentation.py +++ b/tests/cli/test_cli_documentation.py @@ -9,6 +9,7 @@ import pytest +import gaps.cli.documentation from gaps.cli.documentation import ( DEFAULT_EXEC_VALUES, EXTRA_EXEC_PARAMS, @@ -36,7 +37,13 @@ def test_command_documentation_copies_skip_params(): def test_command_documentation_extra_exec_params(): """Test the `CommandDocumentation` with extra exec params.""" - def func(max_workers, sites_per_worker, memory_utilization_limit, timeout, pool_size): + def func( + max_workers, + sites_per_worker, + memory_utilization_limit, + timeout, + pool_size, + ): """A short description. Parameters @@ -86,7 +93,13 @@ def func(max_workers, sites_per_worker, memory_utilization_limit, timeout, pool_ def test_command_documentation_extra_exec_params_no_user_doc(): """Test the `CommandDocumentation` with extra exec params no user doc.""" - def func(max_workers, sites_per_worker, memory_utilization_limit, timeout, pool_size): + def func( + max_workers, + sites_per_worker, + memory_utilization_limit, + timeout, + pool_size, + ): """A short description.""" expected_parameters = [ @@ -370,16 +383,23 @@ def func(): assert doc.extended_summary == expected_str -def test_command_documentation_config_help(): +def test_command_documentation_config_help(monkeypatch): """Test `CommandDocumentation.config_help`.""" + monkeypatch.setattr( + gaps.cli.documentation, "_is_sphinx_build", lambda: True, raising=True + ) + doc = CommandDocumentation(func_no_args, is_split_spatially=True) config_help = doc.config_help(command_name="my_command_name") assert "my_command_name" in config_help - assert doc.parameter_help in config_help + assert ( + gaps.cli.documentation._cli_formatted(doc.parameter_help) + in config_help + ) assert ".. tabs::" in config_help - assert ".. tab::" in config_help + assert ".. group-tab::" in config_help def test_command_documentation_command_help(): From ddee6aeea854d00e5ef803e76f00f5e4457d666f Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 9 Aug 2023 10:46:26 -0600 Subject: [PATCH 50/61] Minor formatting --- gaps/cli/documentation.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py index 0cf6974b..e3d06ee1 100644 --- a/gaps/cli/documentation.py +++ b/gaps/cli/documentation.py @@ -30,17 +30,17 @@ EXTRA_EXEC_PARAMS = { "max_workers": """Maximum number of parallel workers run on each node.""", "sites_per_worker": """Number of sites to run in series on a worker.""", - "memory_utilization_limit": """Memory utilization limit (fractional). Must be a value - between 0 and 100. This input sets how much data will - be stored in-memory at any given time before flushing - to disk. - """, + "memory_utilization_limit": """Memory utilization limit (fractional). + Must be a value between 0 and 100. This input sets + how much data will be stored in-memory at any + given time before flushing to disk. + """, "timeout": """Number of seconds to wait for parallel run iteration to complete before early termination. - """, + """, "pool_size": """Number of futures to submit to a single process pool for parallel futures. - """, + """, } CONFIG_TYPES = [ From bb6014a9b45096e9001ebf39c2ca872da8c4b33c Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 9 Aug 2023 11:02:45 -0600 Subject: [PATCH 51/61] Add zfill fix --- gaps/cli/config.py | 2 +- tests/cli/test_cli_config.py | 50 ++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/gaps/cli/config.py b/gaps/cli/config.py index d3b7f33c..85b35c2f 100644 --- a/gaps/cli/config.py +++ b/gaps/cli/config.py @@ -228,7 +228,7 @@ def kickoff_jobs(self): jobs = sorted(product(*lists_to_run)) num_jobs_submit = len(jobs) self._warn_about_excessive_au_usage(num_jobs_submit) - n_zfill = len(str(num_jobs_submit)) + n_zfill = len(str(max(0, num_jobs_submit - 1))) extra_exec_args = {} for param in EXTRA_EXEC_PARAMS: if param in self.exec_kwargs: diff --git a/tests/cli/test_cli_config.py b/tests/cli/test_cli_config.py index a39ff673..82d29072 100644 --- a/tests/cli/test_cli_config.py +++ b/tests/cli/test_cli_config.py @@ -379,6 +379,56 @@ def test_run_multiple_nodes( ) +@pytest.mark.parametrize("test_class", [False, True]) +def test_run_multiple_nodes_correct_zfill( + test_ctx, runnable_script, monkeypatch, test_class, job_names_cache +): + """Test the `run` function calls `_kickoff_hpc_job` for multiple nodes.""" + + tmp_path = test_ctx.obj["TMP_PATH"] + + if test_class: + command_config = CLICommandFromClass( + TestCommand, + "run", + name="run", + split_keys={"project_points", "_z_0"}, + ) + else: + command_config = CLICommandFromFunction( + _testing_function, + name="run", + split_keys={"project_points", "_z_0"}, + ) + + config = { + "execution_control": { + "option": "eagle", + "allocation": "test", + "walltime": 1, + "nodes": 5, + "max_workers": 1, + }, + "input1": 1, + "input2": 7, + "input3": 8, + "_z_0": ["unsorted", "strings"], + "project_points": [0, 1, 2, 4, 5, 6, 7, 8, 9], + } + + config_fp = tmp_path / "config.json" + with open(config_fp, "w") as config_file: + json.dump(config, config_file) + + assert len(job_names_cache) == 0 + from_config(config_fp, command_config) + assert len(job_names_cache) == 10 + assert len(set(job_names_cache)) == 10 + + assert not any("j00" in job_name for job_name in job_names_cache) + assert any("j0" in job_name for job_name in job_names_cache) + + @pytest.mark.parametrize("test_class", [False, True]) def test_run_no_split_keys( test_ctx, runnable_script, monkeypatch, test_class, job_names_cache From 31ecfb81a6535f5ae78540c3b8cfd74838bcc145 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 9 Aug 2023 12:17:02 -0600 Subject: [PATCH 52/61] Add support for JSON5 --- gaps/cli/documentation.py | 8 ++++---- gaps/config.py | 43 +++++++++++++++++++++++++++++++++++++++ requirements.txt | 3 +++ 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py index e3d06ee1..df1a6a52 100644 --- a/gaps/cli/documentation.py +++ b/gaps/cli/documentation.py @@ -59,7 +59,7 @@ $ {name} template-configs By default, this generates template JSON configuration files, though you -can request YAML or TOML configuration files instead. You can run +can request JSON5, YAML, or TOML configuration files instead. You can run ``$ {name} template-configs --help`` on the command line to see all available options for the ``template-configs`` command. Once the template configuration files have been generated, you can fill them out by referring to the @@ -167,7 +167,7 @@ """ _BATCH_ARGS_DICT = """.. tabs:: - .. group-tab:: JSON + .. group-tab:: JSON/JSON5 :: {sample_json_args_dict} @@ -200,7 +200,7 @@ .. tabs:: - .. group-tab:: JSON + .. group-tab:: JSON/JSON5 :: {sample_json_files} @@ -229,7 +229,7 @@ .. tabs:: - .. group-tab:: JSON + .. group-tab:: JSON/JSON5 :: {template_json_config} diff --git a/gaps/config.py b/gaps/config.py index 7fe73598..a5a6d397 100644 --- a/gaps/config.py +++ b/gaps/config.py @@ -9,6 +9,7 @@ import json import yaml import toml +import pyjson5 from gaps.log import init_logger from gaps.utilities import CaseInsensitiveEnum, resolve_path @@ -17,6 +18,18 @@ _CONFIG_HANDLER_REGISTRY = {} +# pylint: disable=too-few-public-methods +class _JSON5Formatter: + """Format input JSON5 data with indentation.""" + + def __init__(self, data): + self.data = data + + def _format_as_json(self): + """Format the data input with as string with indentation.""" + return json.dumps(self.data, indent=4) + + class Handler(ABC): """ABC for configuration file handler.""" @@ -84,6 +97,36 @@ def loads(cls, config_str): return json.loads(config_str) +# pylint: disable=no-member +class JSON5Handler(Handler): + """JSON5 config file handler.""" + + FILE_EXTENSION = "json5" + + @classmethod + def dump(cls, config, stream): + """Write the config to a stream (JSON5 file).""" + return pyjson5.encode_io( + _JSON5Formatter(config), + stream, + supply_bytes=False, + tojson="_format_as_json", + ) + + @classmethod + def dumps(cls, config): + """Convert the config to a JSON5 string.""" + return pyjson5.encode( + _JSON5Formatter(config), + tojson="_format_as_json", + ) + + @classmethod + def loads(cls, config_str): + """Parse the JSON5 string into a config dictionary.""" + return pyjson5.decode(config_str, maxdepth=-1) + + class YAMLHandler(Handler): """YAML config file handler.""" diff --git a/requirements.txt b/requirements.txt index 4aa083b2..325f0e5f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,9 +5,12 @@ numpy>=1.24.2 numpydoc>=1.5.0 pandas>=2.0.0 psutil>=5.9.2 +pyjson5>=1.6.4 +pyyaml>=6.0 setuptools>=65.4.0 sphinx>=5.3.0 sphinx-copybutton>=0.5.1 sphinx-click>=4.4.0 sphinx-tabs>=3.4.1 tabulate>=0.9.0 +toml>=0.10.2 From d19d24d9c7ff3e19a260ca6ce786c3b5bd83e542 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 9 Aug 2023 12:35:04 -0600 Subject: [PATCH 53/61] Add GHA to run reV CLI tests --- .github/workflows/pr_rev_cli_tests.yml | 40 ++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 .github/workflows/pr_rev_cli_tests.yml diff --git a/.github/workflows/pr_rev_cli_tests.yml b/.github/workflows/pr_rev_cli_tests.yml new file mode 100644 index 00000000..ee0cbc9d --- /dev/null +++ b/.github/workflows/pr_rev_cli_tests.yml @@ -0,0 +1,40 @@ +name: reV CLI Tests + +on: pull_request + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: checkout gaps + uses: actions/checkout@v2 + with: + ref: ${{ github.event.pull_request.head.ref }} + fetch-depth: 1 + path: gaps + - name: checkout reV + uses: actions/checkout@v2 + with: + repository: nrel/reV + fetch-depth: 1 + path: reV + - name: Set up Python + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + python-version: 3.9 + - name: Install reV + working-directory: ./reV + shell: bash -l {0} + run: | + pip install -e . + - name: Install gaps + working-directory: ./gaps + shell: bash -l {0} + run: | + pip install -e . + - name: Run reV CLI tests + working-directory: ./reV + shell: bash -l {0} + run: | + pytest -k cli -v --disable-warnings From da5fc8dc3212791460d0d3332d41c0ef6154535d Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 9 Aug 2023 14:27:30 -0600 Subject: [PATCH 54/61] Expanded collection tests slightly --- tests/cli/test_cli.py | 47 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index f06b4a03..2055dbd1 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -40,6 +40,23 @@ def runnable_script(): _CMD_LIST.pop(0) +def _copy_single_file( + project_points, source_dir, dest_dir, file_pattern, max_workers +): + """Test function that copies over data files.""" + time.sleep(3) + assert project_points.gids == PROJECT_POINTS + assert max_workers == MAX_WORKERS + out_files = [] + for in_file in sorted(Path(source_dir).glob(file_pattern)): + out_file_name = file_pattern.replace("*", "") + out_file = str(Path(dest_dir) / out_file_name) + shutil.copy(in_file, out_file) + out_files.append(out_file) + break + return out_files + + def _copy_files( project_points, source_dir, dest_dir, file_pattern, max_workers ): @@ -100,17 +117,20 @@ def test_make_cli(): @pytest.mark.integration +@pytest.mark.parametrize("test_single_file", [True, False]) def test_cli( tmp_cwd, cli_runner, collect_pattern, manual_collect, runnable_script, + test_single_file, assert_message_was_logged, ): """Integration test of `make_cli`""" data_dir, file_pattern = collect_pattern + run_func = _copy_single_file if test_single_file else _copy_files def preprocess_run_config(config, project_dir, out_dir): assert project_dir == out_dir @@ -121,7 +141,7 @@ def preprocess_run_config(config, project_dir, out_dir): commands = [ CLICommandFromFunction( - _copy_files, + run_func, name="run", add_collect=True, split_keys=["project_points"], @@ -163,7 +183,7 @@ def preprocess_run_config(config, project_dir, out_dir): main, ["pipeline", "-c", pipe_config_fp.as_posix()] ) assert len(set((tmp_cwd / "logs").glob("*run*"))) == 1 - assert len(set(tmp_cwd.glob(file_pattern))) == 4 + assert len(set(tmp_cwd.glob(file_pattern))) == 1 if test_single_file else 4 assert tmp_cwd / "logs" in set(tmp_cwd.glob("*")) result = cli_runner.invoke(main, ["status"]) @@ -183,6 +203,12 @@ def preprocess_run_config(config, project_dir, out_dir): string in lines[-ind] for string in partial.split() ), err_msg + with open(collect_config_fp, "r") as config_file: + config = json.load(config_file) + config["collect_pattern"] = {"out.h5": "./*.h5"} + with open(collect_config_fp, "w") as config_file: + json.dump(config, config_file) + assert tmp_cwd / "chunk_files" not in set(tmp_cwd.glob("*")) result = cli_runner.invoke( main, @@ -199,14 +225,15 @@ def preprocess_run_config(config, project_dir, out_dir): h5_files = set(tmp_cwd.glob("*.h5")) assert len(h5_files) == 1 - with h5py.File(h5_files.pop(), "r") as collected_outputs: - assert len(collected_outputs.keys()) == 5 - assert "cf_mean" in collected_outputs - assert "lcoe_fcr" in collected_outputs - cf_profiles = collected_outputs["cf_profile"][...] + if not test_single_file: + with h5py.File(h5_files.pop(), "r") as collected_outputs: + assert len(collected_outputs.keys()) == 5 + assert "cf_mean" in collected_outputs + assert "lcoe_fcr" in collected_outputs + cf_profiles = collected_outputs["cf_profile"][...] - profiles = manual_collect(data_dir / file_pattern, "cf_profile") - assert np.allclose(profiles, cf_profiles) + profiles = manual_collect(data_dir / file_pattern, "cf_profile") + assert np.allclose(profiles, cf_profiles) result = cli_runner.invoke( main, ["pipeline", "-c", pipe_config_fp.as_posix()] @@ -233,7 +260,7 @@ def preprocess_run_config(config, project_dir, out_dir): assert project_dir == out_dir config["dest_dir"] = str(project_dir) config["source_dir"] = str(data_dir) - config["file_pattern"] = file_pattern + config["file_pattern"] = f"./{file_pattern}" return config commands = [ From aba2f9c18e30a1c6c304028cc1891916a71a26eb Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 9 Aug 2023 18:23:23 -0600 Subject: [PATCH 55/61] Updated status output --- gaps/cli/status.py | 304 +++++++++++++++++++++++++---------- gaps/status.py | 2 +- tests/cli/test_cli.py | 178 ++++++++++---------- tests/cli/test_cli_status.py | 54 +++++-- 4 files changed, 346 insertions(+), 192 deletions(-) diff --git a/gaps/cli/status.py b/gaps/cli/status.py index 83f3d896..e7e05834 100644 --- a/gaps/cli/status.py +++ b/gaps/cli/status.py @@ -9,7 +9,7 @@ import psutil import pandas as pd from colorama import init, Fore, Style -from tabulate import tabulate +from tabulate import tabulate, SEPARATING_LINE from gaps.status import ( DT_FMT, @@ -24,6 +24,7 @@ from gaps.cli.command import _WrappedCommand +JOB_STATUS_COL = StatusField.JOB_STATUS.value FAILURE_STRINGS = ["failure", "fail", "failed", "f"] RUNNING_STRINGS = ["running", "run", "r"] SUBMITTED_STRINGS = ["submitted", "submit", "sb", "pending", "pend", "p"] @@ -89,20 +90,79 @@ def _filter_df_for_status(df, status_request): return df.copy() -def _color_print(df, print_folder, commands, status): - """Color the status portion of the print out.""" +def _calculate_runtime_stats(df): + """Calculate df runtime statistics""" + + run_times_seconds = df[StatusField.RUNTIME_SECONDS] + if run_times_seconds.isna().any(): + return pd.DataFrame() + runtime_stats = pd.DataFrame(run_times_seconds.astype(float).describe()) + runtime_stats = runtime_stats.T[["min", "mean", "50%", "max"]] + runtime_stats = runtime_stats.rename(columns={"50%": "median"}) + # runtime_stats = runtime_stats.rename( + # columns={ + # col: f"{Fore.MAGENTA}{col}{Style.RESET_ALL}" + # for col in runtime_stats.columns + # } + # ) + runtime_stats.index = ["Node runtime"] + runtime_stats = runtime_stats.T + runtime_stats["Node runtime"] = runtime_stats["Node runtime"].apply( + _elapsed_time_as_str + ) + return pd.DataFrame(runtime_stats).reset_index() - def color_string(string): - if string == StatusOption.FAILED: - string = f"{Fore.RED}{string}{Style.RESET_ALL}" - elif string == StatusOption.SUCCESSFUL: - string = f"{Fore.GREEN}{string}{Style.RESET_ALL}" - elif string == StatusOption.RUNNING: - string = f"{Fore.BLUE}{string}{Style.RESET_ALL}" - else: - string = f"{Fore.YELLOW}{string}{Style.RESET_ALL}" - return string +def _calculate_walltime(df): + """Calculate total project walltime""" + all_jobs_failed = (df[StatusField.JOB_STATUS] == StatusOption.FAILED).all() + all_end_times_missing = df[StatusField.TIME_END].isna().all() + if all_jobs_failed and all_end_times_missing: + return 0 + + start_time = df[StatusField.TIME_SUBMITTED].fillna( + dt.datetime.now().strftime(DT_FMT) + ) + start_time = pd.to_datetime(start_time, format=DT_FMT).min() + + end_time = df[StatusField.TIME_END].fillna( + dt.datetime.now().strftime(DT_FMT) + ) + end_time = pd.to_datetime(end_time, format=DT_FMT).max() + return (end_time - start_time).total_seconds() + + +def _calculate_aus(df): + """Calculate the number of AU's spent by jobs""" + run_times_seconds = df[StatusField.RUNTIME_SECONDS] + + hardware_charge_factors = df[StatusField.HARDWARE].apply( + _extract_qos_charge_factor, enum_class=HardwareOption + ) + qos_charge_factors = df[StatusField.QOS].apply( + _extract_qos_charge_factor, enum_class=QOSOption + ) + aus_used = ( + run_times_seconds / 3600 * hardware_charge_factors * qos_charge_factors + ) + return int(aus_used.sum()) + + +def _color_string(string): + """Color string value based on status option""" + if string == StatusOption.FAILED: + string = f"{Fore.RED}{string}{Style.RESET_ALL}" + elif string == StatusOption.SUCCESSFUL: + string = f"{Fore.GREEN}{string}{Style.RESET_ALL}" + elif string == StatusOption.RUNNING: + string = f"{Fore.BLUE}{string}{Style.RESET_ALL}" + else: + string = f"{Fore.YELLOW}{string}{Style.RESET_ALL}" + return string + + +def _print_intro(print_folder, commands, status, monitor_pid): + """Print intro including project folder and command/status filters.""" extras = [] commands = ", ".join(commands or []) if commands: @@ -116,52 +176,127 @@ def color_string(string): else: extras = "" - pid = df.monitor_pid - total_runtime_seconds = df.total_runtime_seconds - total_aus_used = int(df.total_aus_used) - walltime = df.walltime - name = f"\n{Fore.CYAN}{print_folder}{extras}{Style.RESET_ALL}:" - job_status = StatusField.JOB_STATUS - df[job_status.value] = df[job_status].apply(color_string) + print(f"\n{Fore.CYAN}{print_folder}{extras}{Style.RESET_ALL}:") + if monitor_pid is not None and psutil.pid_exists(monitor_pid): + print(f"{Fore.MAGENTA}MONITOR PID: {monitor_pid}{Style.RESET_ALL}") + + +def _print_df(df): + """Print main status body (table of job statuses)""" + df[JOB_STATUS_COL] = df[JOB_STATUS_COL].apply(_color_string) df = df.fillna("--") + + pdf = pd.concat([df, pd.DataFrame({JOB_STATUS_COL: [SEPARATING_LINE]})]) pdf = tabulate( - df, + pdf, showindex=True, # cspell:disable-line headers=df.columns, tablefmt="simple", # cspell:disable-line disable_numparse=[3], # cspell:disable-line ) - print(name) - if pid is not None and psutil.pid_exists(pid): - print(f"{Fore.MAGENTA}MONITOR PID: {pid}{Style.RESET_ALL}") + pdf = pdf.split("\n") + pdf[-1] = pdf[-1].replace(" ", "-") + pdf = "\n".join(pdf) print(pdf) - if total_runtime_seconds is not None: - runtime_str = ( - f"Total Runtime: {_elapsed_time_as_str(total_runtime_seconds)}" - ) - # TODO: make prettier - # divider = "".join(["-"] * len(runtime_str)) - # divider = "".join(["~"] * 3) - divider = "" - print(divider) - print(f"{Style.BRIGHT}{runtime_str}{Style.RESET_ALL}") - if walltime > 2: - walltime_str = ( - f"Total project wall time (including queue and downtime): " - f"{_elapsed_time_as_str(walltime)}" - ) - print(f"{Style.BRIGHT}{walltime_str}{Style.RESET_ALL}") - if total_aus_used > 0: - au_str = f"Total AUs spent: {total_aus_used:,}" - print(f"{Style.BRIGHT}{au_str}{Style.RESET_ALL}") - print( - f"{Style.BRIGHT}**Statistics only include shown jobs (excluding " - f"any previous runs)**{Style.RESET_ALL}" - ) - print() -def main_monitor(folder, commands, status, include): +def _print_job_status_statistics(df): + """Print job status statistics.""" + statistics_str = f"Total number of jobs: {df.shape[0]}" + counts = pd.DataFrame(df[JOB_STATUS_COL].value_counts()).reset_index() + counts = tabulate( + # pylint: disable=unsubscriptable-object + counts[["count", JOB_STATUS_COL]].values, + showindex=False, # cspell:disable-line + headers=counts.columns, + tablefmt="simple", # cspell:disable-line + disable_numparse=[1], # cspell:disable-line + ) + counts = "\n".join( + [f" {substr.lstrip()}" for substr in counts.split("\n")[2:]] + ) + print(f"{Style.BRIGHT}{statistics_str}{Style.RESET_ALL}") + print(counts) + + +def _print_runtime_stats(runtime_stats, total_runtime_seconds): + """Print node runtime statistics""" + + runtime_str = ( + f"Total node runtime: " + f"{_elapsed_time_as_str(total_runtime_seconds)}" + ) + print(f"{Style.BRIGHT}{runtime_str}{Style.RESET_ALL}") + if runtime_stats.empty: + return + + runtime_stats = tabulate( + runtime_stats, + showindex=False, # cspell:disable-line + headers=runtime_stats.columns, + tablefmt="simple", # cspell:disable-line + ) + runtime_stats = "\n".join( + [f" {substr}" for substr in runtime_stats.split("\n")[2:]] + ) + print(runtime_stats) + + +def _print_au_usage(total_aus_used): + """Print the job AU usage.""" + if total_aus_used <= 0: + return + + au_str = f"Total AUs spent: {total_aus_used:,}" + print(f"{Style.BRIGHT}{au_str}{Style.RESET_ALL}") + + +def _print_total_walltime(walltime): + """Print the total project walltime.""" + if walltime <= 2: + return + walltime_str = ( + f"Total project wall time (including queue and downtime " + f"between steps): {_elapsed_time_as_str(walltime)}" + ) + print(f"{Style.BRIGHT}{walltime_str}{Style.RESET_ALL}") + + +def _print_disclaimer(): + """Print disclaimer about statistics.""" + print( + f"{Style.BRIGHT}**Statistics only include shown jobs (excluding " + f"any previous runs or other steps)**{Style.RESET_ALL}" + ) + + +def _color_print( + df, + print_folder, + commands, + status, + walltime, + runtime_stats, + total_aus_used, + monitor_pid=None, + total_runtime_seconds=None, +): + """Color the status portion of the print out.""" + + _print_intro(print_folder, commands, status, monitor_pid) + _print_df(df) + _print_job_status_statistics(df) + + if total_runtime_seconds is None: + return + + _print_runtime_stats(runtime_stats, total_runtime_seconds) + _print_au_usage(total_aus_used) + _print_total_walltime(walltime) + _print_disclaimer() + + +def main_monitor(folder, commands, status, include, recursive): """Run the appropriate monitor functions for a folder. Parameters @@ -174,16 +309,22 @@ def main_monitor(folder, commands, status, include): Container with the statuses to display. include : container of str Container of extra keys to pull from the status files. + recursive : bool + Option to perform recursive search of directories. """ init() - folder = Path(folder).expanduser().resolve() - for directory in chain([folder], folder.rglob("*")): + folders = [Path(folder).expanduser().resolve()] + if recursive: + folders = chain(folders, folders[0].rglob("*")) + + for directory in folders: if not directory.is_dir(): continue pipe_status = Status(directory) if not pipe_status: + print(f"No non-empty status file found in {str(directory)!r}. ") continue include_with_runtime = list(include) + [StatusField.RUNTIME_SECONDS] @@ -193,42 +334,28 @@ def main_monitor(folder, commands, status, include): if status: df = _filter_df_for_status(df, status) - run_times_seconds = df[StatusField.RUNTIME_SECONDS] - hardware_charge_factors = df[StatusField.HARDWARE].apply( - _extract_qos_charge_factor, enum_class=HardwareOption - ) - qos_charge_factors = df[StatusField.QOS].apply( - _extract_qos_charge_factor, enum_class=QOSOption - ) - aus_used = ( - run_times_seconds - / 3600 - * hardware_charge_factors - * qos_charge_factors - ) - df = df[list(df.columns)[:-1]] - df.monitor_pid = pipe_status.get(StatusField.MONITOR_PID) - df.total_runtime_seconds = run_times_seconds.sum() - df.total_aus_used = aus_used.sum() - - all_jobs_failed = ( - df[StatusField.JOB_STATUS] == StatusOption.FAILED - ).all() - all_end_times_missing = df[StatusField.TIME_END].isna().all() - if all_jobs_failed and all_end_times_missing: - df.walltime = 0 - else: - start_time = df[StatusField.TIME_SUBMITTED].fillna( - dt.datetime.now().strftime(DT_FMT) + if df.empty: + print( + f"No status data found to display for {str(directory)!r}. " + "Please check your filters and try again." ) - start_time = pd.to_datetime(start_time, format=DT_FMT).min() + continue - end_time = df[StatusField.TIME_END].fillna( - dt.datetime.now().strftime(DT_FMT) - ) - end_time = pd.to_datetime(end_time, format=DT_FMT).max() - df.walltime = (end_time - start_time).total_seconds() - _color_print(df, directory.name, commands, status) + runtime_stats = _calculate_runtime_stats(df) + aus_used = _calculate_aus(df) + walltime = _calculate_walltime(df) + + _color_print( + df[list(df.columns)[:-1]].copy(), + directory.name, + commands, + status, + walltime, + runtime_stats, + total_aus_used=aus_used, + monitor_pid=pipe_status.get(StatusField.MONITOR_PID), + total_runtime_seconds=df[StatusField.RUNTIME_SECONDS].sum(), + ) def status_command(): @@ -271,6 +398,13 @@ def status_command(): "this option (e.g. :code:`-i key1 -i key2 ...`) By default, no " "extra keys are displayed.", ), + click.Option( + param_decls=["--recursive", "-r"], + is_flag=True, + help="Option to perform a recursive search of directories " + "(starting with the input directory). The status of every nested " + "directory is reported.", + ), ] return _WrappedCommand( diff --git a/gaps/status.py b/gaps/status.py index 499627e4..e28c6866 100644 --- a/gaps/status.py +++ b/gaps/status.py @@ -761,7 +761,7 @@ def _elapsed_time_as_str(seconds_elapsed): hours, minutes = divmod(minutes, 60) time_str = f"{hours:d}:{minutes:02d}:{seconds:02d}" if days: - time_str = f"{days:d} day{'s' if abs(days) != 1 else ''}, {time_str}" + time_str = f"{days:,d} day{'s' if abs(days) != 1 else ''}, {time_str}" return time_str diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index 2055dbd1..ed648e29 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -197,7 +197,7 @@ def preprocess_run_config(config, project_dir, out_dir): "collect-run not submitted", ] - for ind, partial in enumerate(expected_partial_lines[::-1], start=7): + for ind, partial in enumerate(expected_partial_lines[::-1], start=9): err_msg = f"{partial!r} not in {lines[-ind]!r}. All lines: {lines}" assert all( string in lines[-ind] for string in partial.split() @@ -243,94 +243,94 @@ def preprocess_run_config(config, project_dir, out_dir): assert_message_was_logged("is complete.", "INFO") -@pytest.mark.integration -def test_cli_monitor( - tmp_cwd, - cli_runner, - collect_pattern, - manual_collect, - runnable_script, - assert_message_was_logged, -): - """Integration test of `make_cli` with monitor""" - - data_dir, file_pattern = collect_pattern - - def preprocess_run_config(config, project_dir, out_dir): - assert project_dir == out_dir - config["dest_dir"] = str(project_dir) - config["source_dir"] = str(data_dir) - config["file_pattern"] = f"./{file_pattern}" - return config - - commands = [ - CLICommandFromFunction( - _copy_files, - name="run", - add_collect=True, - split_keys=["project_points"], - config_preprocessor=preprocess_run_config, - ) - ] - - main = make_cli(commands) - - assert not set(tmp_cwd.glob("*")) - cli_runner.invoke(main, ["template-configs"]) - files = set(tmp_cwd.glob("*")) - assert len(files) == 3 - for config_type in ["pipeline", "run", "collect_run"]: - assert tmp_cwd / f"config_{config_type}.json" in files - - pipe_config_fp = tmp_cwd / "config_pipeline.json" - run_config_fp = tmp_cwd / "config_run.json" - with open(run_config_fp, "r") as config_file: - config = json.load(config_file) - - assert config["project_points"] == CommandDocumentation.REQUIRED_TAG - exec_control = config["execution_control"] - assert exec_control["max_workers"] == CommandDocumentation.REQUIRED_TAG - assert exec_control["nodes"] == 1 - config["project_points"] = PROJECT_POINTS - config["execution_control"]["option"] = "local" - config["execution_control"]["max_workers"] = MAX_WORKERS - - with open(run_config_fp, "w") as config_file: - json.dump(config, config_file) - - assert not set(tmp_cwd.glob(file_pattern)) - assert tmp_cwd / "logs" not in set(tmp_cwd.glob("*")) - assert tmp_cwd / "chunk_files" not in set(tmp_cwd.glob("*")) - - cli_runner.invoke( - main, ["pipeline", "-c", pipe_config_fp.as_posix(), "--monitor"] - ) - assert len(set((tmp_cwd / "logs").glob("*run*"))) == 2 - assert len(set(tmp_cwd.glob(file_pattern))) == 1 - assert tmp_cwd / "logs" in set(tmp_cwd.glob("*")) - assert tmp_cwd / "chunk_files" in set(tmp_cwd.glob("*")) - assert len(set((tmp_cwd / "chunk_files").glob(file_pattern))) == 4 - - log_file = set((tmp_cwd / "logs").glob("*collect_run*")) - assert len(log_file) == 1 - - with open(log_file.pop(), "r") as log: - assert "DEBUG" not in log.read() - - h5_files = set(tmp_cwd.glob("*.h5")) - assert len(h5_files) == 1 - - with h5py.File(h5_files.pop(), "r") as collected_outputs: - assert len(collected_outputs.keys()) == 5 - assert "cf_mean" in collected_outputs - assert "lcoe_fcr" in collected_outputs - cf_profiles = collected_outputs["cf_profile"][...] - - profiles = manual_collect(data_dir / file_pattern, "cf_profile") - assert np.allclose(profiles, cf_profiles) - - assert_message_was_logged("Pipeline job", "INFO") - assert_message_was_logged("is complete.", "INFO") +# @pytest.mark.integration +# def test_cli_monitor( +# tmp_cwd, +# cli_runner, +# collect_pattern, +# manual_collect, +# runnable_script, +# assert_message_was_logged, +# ): +# """Integration test of `make_cli` with monitor""" + +# data_dir, file_pattern = collect_pattern + +# def preprocess_run_config(config, project_dir, out_dir): +# assert project_dir == out_dir +# config["dest_dir"] = str(project_dir) +# config["source_dir"] = str(data_dir) +# config["file_pattern"] = f"./{file_pattern}" +# return config + +# commands = [ +# CLICommandFromFunction( +# _copy_files, +# name="run", +# add_collect=True, +# split_keys=["project_points"], +# config_preprocessor=preprocess_run_config, +# ) +# ] + +# main = make_cli(commands) + +# assert not set(tmp_cwd.glob("*")) +# cli_runner.invoke(main, ["template-configs"]) +# files = set(tmp_cwd.glob("*")) +# assert len(files) == 3 +# for config_type in ["pipeline", "run", "collect_run"]: +# assert tmp_cwd / f"config_{config_type}.json" in files + +# pipe_config_fp = tmp_cwd / "config_pipeline.json" +# run_config_fp = tmp_cwd / "config_run.json" +# with open(run_config_fp, "r") as config_file: +# config = json.load(config_file) + +# assert config["project_points"] == CommandDocumentation.REQUIRED_TAG +# exec_control = config["execution_control"] +# assert exec_control["max_workers"] == CommandDocumentation.REQUIRED_TAG +# assert exec_control["nodes"] == 1 +# config["project_points"] = PROJECT_POINTS +# config["execution_control"]["option"] = "local" +# config["execution_control"]["max_workers"] = MAX_WORKERS + +# with open(run_config_fp, "w") as config_file: +# json.dump(config, config_file) + +# assert not set(tmp_cwd.glob(file_pattern)) +# assert tmp_cwd / "logs" not in set(tmp_cwd.glob("*")) +# assert tmp_cwd / "chunk_files" not in set(tmp_cwd.glob("*")) + +# cli_runner.invoke( +# main, ["pipeline", "-c", pipe_config_fp.as_posix(), "--monitor"] +# ) +# assert len(set((tmp_cwd / "logs").glob("*run*"))) == 2 +# assert len(set(tmp_cwd.glob(file_pattern))) == 1 +# assert tmp_cwd / "logs" in set(tmp_cwd.glob("*")) +# assert tmp_cwd / "chunk_files" in set(tmp_cwd.glob("*")) +# assert len(set((tmp_cwd / "chunk_files").glob(file_pattern))) == 4 + +# log_file = set((tmp_cwd / "logs").glob("*collect_run*")) +# assert len(log_file) == 1 + +# with open(log_file.pop(), "r") as log: +# assert "DEBUG" not in log.read() + +# h5_files = set(tmp_cwd.glob("*.h5")) +# assert len(h5_files) == 1 + +# with h5py.File(h5_files.pop(), "r") as collected_outputs: +# assert len(collected_outputs.keys()) == 5 +# assert "cf_mean" in collected_outputs +# assert "lcoe_fcr" in collected_outputs +# cf_profiles = collected_outputs["cf_profile"][...] + +# profiles = manual_collect(data_dir / file_pattern, "cf_profile") +# assert np.allclose(profiles, cf_profiles) + +# assert_message_was_logged("Pipeline job", "INFO") +# assert_message_was_logged("is complete.", "INFO") @pytest.mark.skipif( diff --git a/tests/cli/test_cli_status.py b/tests/cli/test_cli_status.py index e0eb81d9..de70c11f 100644 --- a/tests/cli/test_cli_status.py +++ b/tests/cli/test_cli_status.py @@ -84,14 +84,14 @@ def test_status(test_data_dir, cli_runner, extra_args, monkeypatch): "gaps_test_run_j3 submitted local", "collect-run not submitted", ] - + start_ind = -(len(lines) - 1) for ind, partial in enumerate(expected_partial_lines): assert all( - string in lines[-16 + ind] for string in partial.split() + string in lines[start_ind + ind] for string in partial.split() ), partial - assert "Total Runtime" in lines[-6] - assert "Total project wall time" in lines[-5] + assert "Total node runtime" in lines[-5] + assert "Total project wall time" in lines[-3] assert lines[-4] == "Total AUs spent: 6" @@ -162,15 +162,16 @@ def test_status_with_hardware_check( "collect-run not submitted", ] + start_ind = -(len(lines) - 1) for ind, partial in enumerate(expected_partial_lines): assert all( - string in lines[-16 + ind] for string in partial.split() + string in lines[start_ind + ind] for string in partial.split() ), partial - if "failed" in lines[-16 + ind]: - assert not "(r)" in lines[-16 + ind] + if "failed" in lines[start_ind + ind]: + assert not "(r)" in lines[start_ind + ind] - assert "Total Runtime" in lines[-6] - assert "Total project wall time" in lines[-5] + assert "Total node runtime" in lines[-5] + assert "Total project wall time" in lines[-3] assert lines[-4] == "Total AUs spent: 6" @@ -253,12 +254,10 @@ def test_failed_run( "gaps_test_failed_run_j1 failed local high", "gaps_test_failed_run_j2 failed local unspecified", ] - if single_command: - start_ind = -12 - else: + if not single_command: expected_partial_lines += ["collect-run not submitted"] - start_ind = -14 + start_ind = -(len(lines) - 1) for ind, partial in enumerate(expected_partial_lines): assert all( string in lines[start_ind + ind] for string in partial.split() @@ -266,13 +265,34 @@ def test_failed_run( assert not "(r)" in lines[start_ind + ind] assert "Total AUs spent" not in lines[start_ind + ind] if single_command: - assert "Total project wall time" not in lines[start_ind + 10] + assert "Total project wall time" not in lines[start_ind + 12] if single_command: - assert "Total Runtime: 0:00:00" in lines[-4] + assert "Total node runtime: 0:00:00" in lines[-3] else: - assert "Total Runtime: 0:00:00" in lines[-5] - assert "Total project wall time" in lines[-4] + assert "Total node runtime: 0:00:00" in lines[-4] + assert "Total project wall time" in lines[-3] + + +def test_recursive_status(tmp_path, test_data_dir, cli_runner, monkeypatch): + """Test the status command for recursive directories.""" + + monkeypatch.setattr(psutil, "pid_exists", lambda *__: True, raising=True) + + status = status_command() + shutil.copytree(test_data_dir / "test_run", tmp_path / "test_run") + shutil.copytree( + test_data_dir / "test_failed_run", + tmp_path / "test_run" / "test_failed_run", + ) + run_dir = (tmp_path / "test_run").as_posix() + + result = cli_runner.invoke(status, [run_dir, "-r"]) + + lines = result.stdout.split("\n") + assert any(line == "test_run:" for line in lines) + assert any(line == "test_failed_run:" for line in lines) + assert len(lines) > 20 if __name__ == "__main__": From a3ad402ff3e16d09d5b63b1800081d4c7dd63606 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 9 Aug 2023 19:01:32 -0600 Subject: [PATCH 56/61] Updated tests to be more robust against messages in result --- tests/cli/test_cli_status.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tests/cli/test_cli_status.py b/tests/cli/test_cli_status.py index de70c11f..e2365813 100644 --- a/tests/cli/test_cli_status.py +++ b/tests/cli/test_cli_status.py @@ -84,11 +84,11 @@ def test_status(test_data_dir, cli_runner, extra_args, monkeypatch): "gaps_test_run_j3 submitted local", "collect-run not submitted", ] - start_ind = -(len(lines) - 1) + start_ind = -21 for ind, partial in enumerate(expected_partial_lines): assert all( string in lines[start_ind + ind] for string in partial.split() - ), partial + ), f"{partial}, {lines[start_ind + ind:]}" assert "Total node runtime" in lines[-5] assert "Total project wall time" in lines[-3] @@ -162,11 +162,11 @@ def test_status_with_hardware_check( "collect-run not submitted", ] - start_ind = -(len(lines) - 1) + start_ind = -19 for ind, partial in enumerate(expected_partial_lines): assert all( string in lines[start_ind + ind] for string in partial.split() - ), partial + ), f"{partial}, {lines[start_ind + ind:]}" if "failed" in lines[start_ind + ind]: assert not "(r)" in lines[start_ind + ind] @@ -254,14 +254,17 @@ def test_failed_run( "gaps_test_failed_run_j1 failed local high", "gaps_test_failed_run_j2 failed local unspecified", ] - if not single_command: + + if single_command: + start_ind = -13 + else: + start_ind = -16 expected_partial_lines += ["collect-run not submitted"] - start_ind = -(len(lines) - 1) for ind, partial in enumerate(expected_partial_lines): assert all( string in lines[start_ind + ind] for string in partial.split() - ), partial + ), f"{partial}, {lines[start_ind + ind]:}" assert not "(r)" in lines[start_ind + ind] assert "Total AUs spent" not in lines[start_ind + ind] if single_command: From 5b805b1d341075517c0bd9c8dbb68b5c984de462 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 9 Aug 2023 19:01:39 -0600 Subject: [PATCH 57/61] Bump version --- gaps/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gaps/version.py b/gaps/version.py index 5ffd5b6e..c6b69185 100644 --- a/gaps/version.py +++ b/gaps/version.py @@ -1,3 +1,3 @@ """GAPs Version Number. """ -__version__ = "0.3.4" +__version__ = "0.4.0" From 6966cfd86d48163c9760504647850db87265f261 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 9 Aug 2023 21:58:50 -0600 Subject: [PATCH 58/61] Better str comparison in test --- tests/test_hpc.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_hpc.py b/tests/test_hpc.py index a88b07c4..d7dd7774 100644 --- a/tests/test_hpc.py +++ b/tests/test_hpc.py @@ -325,7 +325,9 @@ def test_submit(monkeypatch): assert not err out, err = submit(cmd_squeue, background=False, background_stdout=False) - assert out == SQUEUE_RAW.replace("\n", "\r\n") + out = out.replace("\n", "").replace("\r", "") + expected = SQUEUE_RAW.replace("\n", "").replace("\r", "") + assert out == expected assert not err with pytest.raises(OSError): From 8ccba102c9e0c02ef947053edcfeb0b116c1f9be Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 9 Aug 2023 22:00:39 -0600 Subject: [PATCH 59/61] Install pytest with gaps --- .github/workflows/pr_rev_cli_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr_rev_cli_tests.yml b/.github/workflows/pr_rev_cli_tests.yml index ee0cbc9d..f3615af2 100644 --- a/.github/workflows/pr_rev_cli_tests.yml +++ b/.github/workflows/pr_rev_cli_tests.yml @@ -32,7 +32,7 @@ jobs: working-directory: ./gaps shell: bash -l {0} run: | - pip install -e . + pip install -e .[dev] - name: Run reV CLI tests working-directory: ./reV shell: bash -l {0} From da8ac58c137be8c26f144db736a4bf39d507436c Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 9 Aug 2023 22:04:11 -0600 Subject: [PATCH 60/61] Increase wait limit for test --- tests/cli/test_cli.py | 178 +++++++++++++++++++++--------------------- 1 file changed, 89 insertions(+), 89 deletions(-) diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index ed648e29..1dc6fe0d 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -243,94 +243,94 @@ def preprocess_run_config(config, project_dir, out_dir): assert_message_was_logged("is complete.", "INFO") -# @pytest.mark.integration -# def test_cli_monitor( -# tmp_cwd, -# cli_runner, -# collect_pattern, -# manual_collect, -# runnable_script, -# assert_message_was_logged, -# ): -# """Integration test of `make_cli` with monitor""" - -# data_dir, file_pattern = collect_pattern - -# def preprocess_run_config(config, project_dir, out_dir): -# assert project_dir == out_dir -# config["dest_dir"] = str(project_dir) -# config["source_dir"] = str(data_dir) -# config["file_pattern"] = f"./{file_pattern}" -# return config - -# commands = [ -# CLICommandFromFunction( -# _copy_files, -# name="run", -# add_collect=True, -# split_keys=["project_points"], -# config_preprocessor=preprocess_run_config, -# ) -# ] - -# main = make_cli(commands) - -# assert not set(tmp_cwd.glob("*")) -# cli_runner.invoke(main, ["template-configs"]) -# files = set(tmp_cwd.glob("*")) -# assert len(files) == 3 -# for config_type in ["pipeline", "run", "collect_run"]: -# assert tmp_cwd / f"config_{config_type}.json" in files - -# pipe_config_fp = tmp_cwd / "config_pipeline.json" -# run_config_fp = tmp_cwd / "config_run.json" -# with open(run_config_fp, "r") as config_file: -# config = json.load(config_file) - -# assert config["project_points"] == CommandDocumentation.REQUIRED_TAG -# exec_control = config["execution_control"] -# assert exec_control["max_workers"] == CommandDocumentation.REQUIRED_TAG -# assert exec_control["nodes"] == 1 -# config["project_points"] = PROJECT_POINTS -# config["execution_control"]["option"] = "local" -# config["execution_control"]["max_workers"] = MAX_WORKERS - -# with open(run_config_fp, "w") as config_file: -# json.dump(config, config_file) - -# assert not set(tmp_cwd.glob(file_pattern)) -# assert tmp_cwd / "logs" not in set(tmp_cwd.glob("*")) -# assert tmp_cwd / "chunk_files" not in set(tmp_cwd.glob("*")) - -# cli_runner.invoke( -# main, ["pipeline", "-c", pipe_config_fp.as_posix(), "--monitor"] -# ) -# assert len(set((tmp_cwd / "logs").glob("*run*"))) == 2 -# assert len(set(tmp_cwd.glob(file_pattern))) == 1 -# assert tmp_cwd / "logs" in set(tmp_cwd.glob("*")) -# assert tmp_cwd / "chunk_files" in set(tmp_cwd.glob("*")) -# assert len(set((tmp_cwd / "chunk_files").glob(file_pattern))) == 4 - -# log_file = set((tmp_cwd / "logs").glob("*collect_run*")) -# assert len(log_file) == 1 - -# with open(log_file.pop(), "r") as log: -# assert "DEBUG" not in log.read() - -# h5_files = set(tmp_cwd.glob("*.h5")) -# assert len(h5_files) == 1 - -# with h5py.File(h5_files.pop(), "r") as collected_outputs: -# assert len(collected_outputs.keys()) == 5 -# assert "cf_mean" in collected_outputs -# assert "lcoe_fcr" in collected_outputs -# cf_profiles = collected_outputs["cf_profile"][...] - -# profiles = manual_collect(data_dir / file_pattern, "cf_profile") -# assert np.allclose(profiles, cf_profiles) - -# assert_message_was_logged("Pipeline job", "INFO") -# assert_message_was_logged("is complete.", "INFO") +@pytest.mark.integration +def test_cli_monitor( + tmp_cwd, + cli_runner, + collect_pattern, + manual_collect, + runnable_script, + assert_message_was_logged, +): + """Integration test of `make_cli` with monitor""" + + data_dir, file_pattern = collect_pattern + + def preprocess_run_config(config, project_dir, out_dir): + assert project_dir == out_dir + config["dest_dir"] = str(project_dir) + config["source_dir"] = str(data_dir) + config["file_pattern"] = f"./{file_pattern}" + return config + + commands = [ + CLICommandFromFunction( + _copy_files, + name="run", + add_collect=True, + split_keys=["project_points"], + config_preprocessor=preprocess_run_config, + ) + ] + + main = make_cli(commands) + + assert not set(tmp_cwd.glob("*")) + cli_runner.invoke(main, ["template-configs"]) + files = set(tmp_cwd.glob("*")) + assert len(files) == 3 + for config_type in ["pipeline", "run", "collect_run"]: + assert tmp_cwd / f"config_{config_type}.json" in files + + pipe_config_fp = tmp_cwd / "config_pipeline.json" + run_config_fp = tmp_cwd / "config_run.json" + with open(run_config_fp, "r") as config_file: + config = json.load(config_file) + + assert config["project_points"] == CommandDocumentation.REQUIRED_TAG + exec_control = config["execution_control"] + assert exec_control["max_workers"] == CommandDocumentation.REQUIRED_TAG + assert exec_control["nodes"] == 1 + config["project_points"] = PROJECT_POINTS + config["execution_control"]["option"] = "local" + config["execution_control"]["max_workers"] = MAX_WORKERS + + with open(run_config_fp, "w") as config_file: + json.dump(config, config_file) + + assert not set(tmp_cwd.glob(file_pattern)) + assert tmp_cwd / "logs" not in set(tmp_cwd.glob("*")) + assert tmp_cwd / "chunk_files" not in set(tmp_cwd.glob("*")) + + cli_runner.invoke( + main, ["pipeline", "-c", pipe_config_fp.as_posix(), "--monitor"] + ) + assert len(set((tmp_cwd / "logs").glob("*run*"))) == 2 + assert len(set(tmp_cwd.glob(file_pattern))) == 1 + assert tmp_cwd / "logs" in set(tmp_cwd.glob("*")) + assert tmp_cwd / "chunk_files" in set(tmp_cwd.glob("*")) + assert len(set((tmp_cwd / "chunk_files").glob(file_pattern))) == 4 + + log_file = set((tmp_cwd / "logs").glob("*collect_run*")) + assert len(log_file) == 1 + + with open(log_file.pop(), "r") as log: + assert "DEBUG" not in log.read() + + h5_files = set(tmp_cwd.glob("*.h5")) + assert len(h5_files) == 1 + + with h5py.File(h5_files.pop(), "r") as collected_outputs: + assert len(collected_outputs.keys()) == 5 + assert "cf_mean" in collected_outputs + assert "lcoe_fcr" in collected_outputs + cf_profiles = collected_outputs["cf_profile"][...] + + profiles = manual_collect(data_dir / file_pattern, "cf_profile") + assert np.allclose(profiles, cf_profiles) + + assert_message_was_logged("Pipeline job", "INFO") + assert_message_was_logged("is complete.", "INFO") @pytest.mark.skipif( @@ -406,7 +406,7 @@ def preprocess_run_config(config, project_dir, out_dir): if os.getpid() == status["monitor_pid"]: # Wait to die for __ in range(10): - time.sleep(10) + time.sleep(60) pytest.exit(0) assert ( From ce9fd126b43d7a1f80a8e9ef2a109b02c8e7f331 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 9 Aug 2023 22:09:32 -0600 Subject: [PATCH 61/61] Increase wait time in tests again --- tests/cli/test_cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index 1dc6fe0d..7f2fabb9 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -416,8 +416,8 @@ def preprocess_run_config(config, project_dir, out_dir): != StatusOption.SUCCESSFUL ) - for __ in range(6): - time.sleep(10) + for __ in range(10): + time.sleep(60) collect_status = Status.retrieve_job_status( tmp_cwd, "collect-run", f"{tmp_cwd.name}_collect_run" )