From d78df6d4f597037f1942f9e8094ce9ac26f7557f Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Thu, 22 Jun 2023 17:01:40 -0600
Subject: [PATCH 01/61] Moved and updated `GAPS_SUPPLIED_ARGS`

---
 gaps/cli/command.py          | 13 +-----
 gaps/cli/config.py           | 12 ++++++
 tests/cli/test_cli_config.py | 84 +++++++++++++++++++++++++++++++++---
 3 files changed, 90 insertions(+), 19 deletions(-)

diff --git a/gaps/cli/command.py b/gaps/cli/command.py
index bfde8db6..825c1e7e 100644
--- a/gaps/cli/command.py
+++ b/gaps/cli/command.py
@@ -9,22 +9,11 @@
 
 import click
 
+from gaps.cli.config import GAPS_SUPPLIED_ARGS
 from gaps.cli.documentation import CommandDocumentation
 from gaps.cli.preprocessing import split_project_points_into_ranges
 
 
-GAPS_SUPPLIED_ARGS = {
-    "tag",
-    "command_name",
-    "config_file",
-    "project_dir",
-    "job_name",
-    "out_dir",
-    "out_fpath",
-    "config",
-}
-
-
 class AbstractBaseCLICommandConfiguration(ABC):
     """Abstract Base CLI Command representation.
 
diff --git a/gaps/cli/config.py b/gaps/cli/config.py
index 8fbd79b1..2f556d06 100644
--- a/gaps/cli/config.py
+++ b/gaps/cli/config.py
@@ -35,6 +35,18 @@
 ]
 TAG = "_j"
 MAX_AU_BEFORE_WARNING = 10_000
+GAPS_SUPPLIED_ARGS = {
+    "tag",
+    "command_name",
+    "config_file",
+    "project_dir",
+    "job_name",
+    "out_dir",
+    "out_fpath",
+    "config",
+    "log_directory",
+    "verbose",
+}
 
 
 class _FromConfig:
diff --git a/tests/cli/test_cli_config.py b/tests/cli/test_cli_config.py
index fbb8b20f..81080f2d 100644
--- a/tests/cli/test_cli_config.py
+++ b/tests/cli/test_cli_config.py
@@ -363,13 +363,6 @@ def test_warning_about_au_usage(
 ):
     """Test the `run` function calls `_kickoff_hpc_job` for multiple nodes."""
 
-    # def assert_message_was_logged(caplog):
-    # """Assert that a particular (partial) message was logged."""
-    # caplog.clear()
-
-    # def assert_message(msg, log_level=None, clear_records=False):
-    #     """Assert that a message was logged."""
-    #     assert caplog.records
     tmp_path = test_ctx.obj["TMP_PATH"]
 
     if test_class:
@@ -759,5 +752,82 @@ def test_func(project_points, input1, input2, tag):
     assert all(key not in status["run"]["test"] for key in exclude)
 
 
+@pytest.mark.parametrize("test_extras", [False, True])
+@pytest.mark.parametrize("test_class", [False, True])
+def test_args_passed_to_pre_processor(
+    tmp_path, test_ctx, test_extras, test_class, runnable_script
+):
+    """Test that correct arguments are passed to the pre-processor."""
+
+    input_config = {
+        "execution_control": {"max_workers": 100},
+        "input1": 1,
+        "a_value": 5,
+        "a_multiplier": 2,
+        "input2": 7,
+        "_input2": 8,
+        "input3": None,
+        "project_points": [0, 1, 2],
+    }
+    config_fp = tmp_path / "config.json"
+
+    if test_extras:
+        input_config["log_directory"] = str(tmp_path / "other_logs")
+        input_config["log_level"] = "DEBUG"
+
+    with open(config_fp, "w") as config_file:
+        json.dump(input_config, config_file)
+
+    def pre_processing(
+        config,
+        a_value,
+        a_multiplier,
+        command_name,
+        config_file,
+        project_dir,
+        job_name,
+        out_dir,
+        out_fpath,
+        log_directory,
+        verbose,
+    ):
+        assert a_value == 5
+        assert a_multiplier == 2
+        assert command_name == "run"
+        assert config_file == config_fp
+        assert project_dir == tmp_path
+        assert tmp_path.name in job_name
+        assert "run" in job_name
+        assert out_dir == tmp_path
+        assert out_fpath == out_dir / job_name
+        if test_extras:
+            assert log_directory == tmp_path / "other_logs"
+            assert verbose
+        else:
+            assert config == input_config
+            assert log_directory == tmp_path / "logs"
+            assert not verbose
+
+        return config
+
+    if test_class:
+        command_config = CLICommandFromClass(
+            TestCommand,
+            "run",
+            split_keys={"project_points", "input3"},
+            config_preprocessor=pre_processing,
+        )
+    else:
+        command_config = CLICommandFromFunction(
+            _testing_function,
+            name="run",
+            split_keys={"project_points", "input3"},
+            config_preprocessor=pre_processing,
+        )
+
+    with pytest.warns(gapsWarning) as warning_info:
+        from_config(config_fp, command_config)
+
+
 if __name__ == "__main__":
     pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"])

From cd4b5e40a9b35f55be49ff260e98c19106c73543 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Thu, 22 Jun 2023 17:04:14 -0600
Subject: [PATCH 02/61] Logging parameters now passed to preprocessor

---
 gaps/cli/config.py            | 17 ++++++++++-------
 tests/cli/test_cli_command.py | 10 +++-------
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/gaps/cli/config.py b/gaps/cli/config.py
index 2f556d06..d7ef3539 100644
--- a/gaps/cli/config.py
+++ b/gaps/cli/config.py
@@ -99,11 +99,11 @@ def enable_logging(self):
         self.log_directory = self.config.pop(
             "log_directory", (self.project_dir / "logs").as_posix()
         )
-        self.verbose = (
-            self.config.pop("log_level", "INFO") == "DEBUG"
-            or self.ctx.obj.get("VERBOSE", False)
-        )
-        pipe_log_file = Path(self.log_directory) / f"{self.job_name}.log"
+        self.log_directory = Path(self.log_directory)
+        self.verbose = self.config.pop(
+            "log_level", "INFO"
+        ) == "DEBUG" or self.ctx.obj.get("VERBOSE", False)
+        pipe_log_file = self.log_directory / f"{self.job_name}.log"
         init_logger(
             stream=self.ctx.obj.get("LOG_STREAM", True),
             level="DEBUG" if self.verbose else "INFO",
@@ -127,6 +127,9 @@ def preprocess_config(self):
             "project_dir": self.project_dir,
             "job_name": self.job_name,
             "out_dir": self.project_dir,
+            "out_fpath": self.project_dir / self.job_name,
+            "log_directory": self.log_directory,
+            "verbose": self.verbose,
         }
         extra_preprocessor_kwargs = {
             k: self.config[k]
@@ -155,7 +158,7 @@ def set_exec_kwargs(self):
         self.exec_kwargs = {
             "option": "local",
             "sh_script": "",
-            "stdout_path": (Path(self.log_directory) / "stdout").as_posix(),
+            "stdout_path": (self.log_directory / "stdout").as_posix(),
         }
 
         self.exec_kwargs.update(self.config.get("execution_control", {}))
@@ -177,7 +180,7 @@ def set_logging_options(self):
         """Assemble the logging options dictionary."""
         self.logging_options = {
             "name": self.job_name,
-            "log_directory": self.log_directory,
+            "log_directory": self.log_directory.as_posix(),
             "verbose": self.verbose,
             "node": self.exec_kwargs.get("option", "local") != "local",
         }
diff --git a/tests/cli/test_cli_command.py b/tests/cli/test_cli_command.py
index 7b412ce2..3b0522e7 100644
--- a/tests/cli/test_cli_command.py
+++ b/tests/cli/test_cli_command.py
@@ -7,11 +7,8 @@
 import click
 import pytest
 
-from gaps.cli.command import (
-    CLICommandFromFunction,
-    GAPS_SUPPLIED_ARGS,
-    _WrappedCommand,
-)
+from gaps.cli.command import CLICommandFromFunction, _WrappedCommand
+from gaps.cli.config import GAPS_SUPPLIED_ARGS
 
 
 def test_cli_command_configuration():
@@ -29,8 +26,7 @@ def _test_func():
     assert len(ccc.documentation.signatures) == 2
     assert not ccc.is_split_spatially
     assert all(
-        param in ccc.documentation.skip_params
-        for param in GAPS_SUPPLIED_ARGS
+        param in ccc.documentation.skip_params for param in GAPS_SUPPLIED_ARGS
     )
 
     def _test_preprocessor(config, name, _a_default=3):

From 157410cfead709045618644eb61616617eea29b7 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Thu, 22 Jun 2023 17:04:28 -0600
Subject: [PATCH 03/61] Updated docstrings

---
 gaps/cli/command.py | 81 ++++++++++++++++++++++++++++++---------------
 1 file changed, 54 insertions(+), 27 deletions(-)

diff --git a/gaps/cli/command.py b/gaps/cli/command.py
index 825c1e7e..4d1417af 100644
--- a/gaps/cli/command.py
+++ b/gaps/cli/command.py
@@ -143,14 +143,13 @@ def __init__(
                     convenience purposes. This argument combines the
                     ``out_dir`` with ``job_name`` to yield a unique
                     output filepath for a given node. Note that the
-                    output filename will contain the tag if it is
-                    "requested" from a node function, but *WILL NOT*
-                    contain a tag if requested from a config
-                    pre-processing function, since the number of split
-                    nodes have not been determined at that point.
-                    Also note that this string *WILL NOT* contain a
-                    file-ending, so that will have to be added by the
-                    node function.
+                    output filename will contain the tag. Also note that
+                    this string *WILL NOT* contain a file-ending, so
+                    that will have to be added by the node function.
+                max_workers : int
+                    Max number of workers to run per node. This option
+                    will be requested from the user within the
+                    "execution_control" dictionary in the config file.
 
             If your function is capable of multiprocessing, you should
             also include ``max_workers`` in the function signature.
@@ -232,43 +231,40 @@ def __init__(
             can also "request" the following arguments by including them
             in the function signature:
 
-                tag : str
-                    Short string unique to this job run that can be used
-                    to generate unique output filenames, thereby
-                    avoiding clashing output files with jobs on other
-                    nodes. This string  contains a leading underscore,
-                    so the file name can easily be generated:
-                    ``f"{out_file_name}{tag}.{extension}"``.
                 command_name : str
                     Name of the command being run. This is equivalent to
                     the ``name`` input above.
-                config_file : str
+                config_file : Path
                     Path to the configuration file specified by the
                     user.
-                project_dir : str
+                project_dir : Path
                     Path to the project directory (parent directory of
                     the configuration file).
                 job_name : str
                     Name of the job being run. This is typically a
                     combination of the project directory and the command
                     name.
-                out_dir : str
+                out_dir : Path
                     Path to output directory - typically equivalent to
                     the project directory.
-                out_fpath : str
+                out_fpath : Path
                     Suggested path to output file. You are not required
                     to use this argument - it is provided purely for
                     convenience purposes. This argument combines the
                     ``out_dir`` with ``job_name`` to yield a unique
                     output filepath for a given node. Note that the
-                    output filename will contain the tag if it is
-                    "requested" from a node function, but *WILL NOT*
-                    contain a tag if requested from a config
-                    pre-processing function, since the number of split
-                    nodes have not been determined at that point.
+                    output filename *WILL NOT* contain the tag, since
+                    the number of split nodes have not been determined
+                    when the config pre-processing function is called.
                     Also note that this string *WILL NOT* contain a
                     file-ending, so that will have to be added by the
                     node function.
+                log_directory : Path
+                    Path to log output directory (defaults to
+                    project_dir / "logs").
+                verbose : bool
+                    Flag indicating wether the user has selected a DEBUG
+                    verbosity level for logs.
 
             These inputs will be provided by GAPs and *will not* be
             displayed to users in the template configuration files or
@@ -405,10 +401,10 @@ def __init__(
                 command_name : str
                     Name of the command being run. This is equivalent to
                     the ``name`` input argument.
-                config_file : Path
+                config_file : str
                     Path to the configuration file specified by the
                     user.
-                project_dir : Path
+                project_dir : str
                     Path to the project directory (parent directory of
                     the configuration file).
                 job_name : str
@@ -418,9 +414,22 @@ def __init__(
                     will not be included if you request this argument
                     in a config preprocessing function, as the execution
                     has not been split into multiple jobs by that point.
-                out_dir : Path
+                out_dir : str
                     Path to output directory - typically equivalent to
                     the project directory.
+                out_fpath : str
+                    Suggested path to output file. You are not required
+                    to use this argument - it is provided purely for
+                    convenience purposes. This argument combines the
+                    ``out_dir`` with ``job_name`` to yield a unique
+                    output filepath for a given node. Note that the
+                    output filename will contain the tag. Also note that
+                    this string *WILL NOT* contain a file-ending, so
+                    that will have to be added by the node function.
+                max_workers : int
+                    Max number of workers to run per node. This option
+                    will be requested from the user within the
+                    "execution_control" dictionary in the config file.
 
             If your function is capable of multiprocessing, you should
             also include ``max_workers`` in the function signature.
@@ -517,6 +526,24 @@ def __init__(
                 out_dir : Path
                     Path to output directory - typically equivalent to
                     the project directory.
+                out_fpath : Path
+                    Suggested path to output file. You are not required
+                    to use this argument - it is provided purely for
+                    convenience purposes. This argument combines the
+                    ``out_dir`` with ``job_name`` to yield a unique
+                    output filepath for a given node. Note that the
+                    output filename *WILL NOT* contain the tag, since
+                    the number of split nodes have not been determined
+                    when the config pre-processing function is called.
+                    Also note that this string *WILL NOT* contain a
+                    file-ending, so that will have to be added by the
+                    node function.
+                log_directory : Path
+                    Path to log output directory (defaults to
+                    project_dir / "logs").
+                verbose : bool
+                    Flag indicating wether the user has selected a DEBUG
+                    verbosity level for logs.
 
             These inputs will be provided by GAPs and *will not* be
             displayed to users in the template configuration files or

From b75e0c0408ac9100e0f5380fb5ad7b9887b9b707 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Thu, 22 Jun 2023 17:25:03 -0600
Subject: [PATCH 04/61] Added info logging

---
 gaps/cli/config.py           | 14 +++++++++++++-
 tests/cli/test_cli_config.py | 12 +++++++++++-
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/gaps/cli/config.py b/gaps/cli/config.py
index d7ef3539..437926ec 100644
--- a/gaps/cli/config.py
+++ b/gaps/cli/config.py
@@ -113,7 +113,7 @@ def enable_logging(self):
 
     def validate_config(self):
         """Validate the user input config file."""
-        logger.debug("Validating %r", self.config_file)
+        logger.debug("Validating %r", str(self.config_file))
         _validate_config(self.config, self.command_config.documentation)
         return self
 
@@ -205,6 +205,17 @@ def prepare_context(self):
         self.ctx.obj["OUT_DIR"] = self.project_dir
         return self
 
+    def log_job_info(self):
+        """Log information about job submission"""
+        logger.info(
+            "Running %s from config file: %r",
+            self.command_name,
+            str(self.config_file),
+        )
+        logger.info("Target output directory: %r", str(self.project_dir))
+        logger.info("Target logging directory: %r", str(self.log_directory))
+        return self
+
     def kickoff_jobs(self):
         """Kickoff jobs across nodes based on config and run function."""
         keys_to_run, lists_to_run = self._keys_and_lists_to_run()
@@ -315,6 +326,7 @@ def run(self):
             .set_logging_options()
             .set_exclude_from_status()
             .prepare_context()
+            .log_job_info()
             .kickoff_jobs()
         )
 
diff --git a/tests/cli/test_cli_config.py b/tests/cli/test_cli_config.py
index 81080f2d..0b363b31 100644
--- a/tests/cli/test_cli_config.py
+++ b/tests/cli/test_cli_config.py
@@ -202,7 +202,7 @@ def runnable_script():
 )
 @pytest.mark.parametrize("test_class", [False, True])
 def test_run_local(
-    test_ctx, extra_input, none_list, runnable_script, test_class
+    test_ctx, extra_input, none_list, runnable_script, test_class, caplog
 ):
     """Test the `run` function locally."""
 
@@ -248,6 +248,15 @@ def pre_processing(config, a_value, a_multiplier):
     assert "input2" in warning_info[0].message.args[0]
     assert "_input2" in warning_info[0].message.args[0]
 
+    expected_log_starts = [
+        "Running run from config file: '",
+        "Target output directory: '",
+        "Target logging directory: '",
+    ]
+    for expected in expected_log_starts:
+        assert any(expected in record.message for record in caplog.records)
+    assert not any("Path(" in record.message for record in caplog.records)
+
     if "max_workers" in extra_input:
         expected_message = (
             "Found key 'max_workers' outside of 'execution_control'. "
@@ -778,6 +787,7 @@ def test_args_passed_to_pre_processor(
     with open(config_fp, "w") as config_file:
         json.dump(input_config, config_file)
 
+    # pylint: disable=too-many-arguments
     def pre_processing(
         config,
         a_value,

From 7fdc24b7e4066938be4f3ab62f04d1e6ec62a078 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Thu, 22 Jun 2023 17:28:18 -0600
Subject: [PATCH 05/61] Log command name

---
 gaps/cli/execution.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gaps/cli/execution.py b/gaps/cli/execution.py
index 537ddee0..fa33ad46 100644
--- a/gaps/cli/execution.py
+++ b/gaps/cli/execution.py
@@ -143,7 +143,8 @@ def _kickoff_hpc_job(ctx, cmd, **kwargs):
         return
 
     name = ctx.obj["NAME"]
-    logger.info("Running on HPC with job name %r.", name)
+    command = ctx.obj["COMMAND_NAME"]
+    logger.info("Running %s on HPC with job name %r.", command, name)
     logger.debug("Submitting the following command:\n%s", cmd)
     out = ctx.obj["MANAGER"].submit(name, cmd=cmd, **kwargs)[0]
     id_msg = f" (Job ID #{out})" if out else ""

From e4a15a61aa27931df73a85e11833c8ae480e1f76 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Thu, 22 Jun 2023 17:39:08 -0600
Subject: [PATCH 06/61] More thorough test for `_should_run` function

---
 gaps/cli/execution.py           |  5 ++-
 tests/cli/test_cli_execution.py | 73 ++++++++++++++++++---------------
 2 files changed, 43 insertions(+), 35 deletions(-)

diff --git a/gaps/cli/execution.py b/gaps/cli/execution.py
index fa33ad46..d89ebd22 100644
--- a/gaps/cli/execution.py
+++ b/gaps/cli/execution.py
@@ -176,7 +176,10 @@ def _should_run(ctx):
         job_name=name,
         subprocess_manager=ctx.obj.get("MANAGER"),
     )
-    if status == StatusOption.SUCCESSFUL:
+    if status == StatusOption.NOT_SUBMITTED:
+        return True
+
+    if status in {StatusOption.SUCCESSFUL, StatusOption.COMPLETE}:
         msg = (
             f"Job {name!r} is successful in status json found in {out_dir!r}, "
             f"not re-running."
diff --git a/tests/cli/test_cli_execution.py b/tests/cli/test_cli_execution.py
index 2cdaf104..aad4be76 100644
--- a/tests/cli/test_cli_execution.py
+++ b/tests/cli/test_cli_execution.py
@@ -31,40 +31,45 @@ def test_should_run(test_ctx, caplog, assert_message_was_logged):
     assert _should_run(test_ctx)
     assert not caplog.records
 
-    Status.make_single_job_file(
-        test_ctx.obj["OUT_DIR"],
-        test_ctx.obj["COMMAND_NAME"],
-        test_ctx.obj["NAME"],
-        {StatusField.JOB_STATUS: StatusOption.FAILED},
-    )
-
-    assert _should_run(test_ctx)
-    assert not caplog.records
-
-    Status.make_single_job_file(
-        test_ctx.obj["OUT_DIR"],
-        test_ctx.obj["COMMAND_NAME"],
-        test_ctx.obj["NAME"],
-        {StatusField.JOB_STATUS: StatusOption.RUNNING},
-    )
-
-    assert not _should_run(test_ctx)
-    assert_message_was_logged(test_ctx.obj["NAME"], "INFO")
-    assert_message_was_logged("was found with status", "INFO")
-    assert_message_was_logged("running", "INFO", clear_records=True)
-    assert not caplog.records
-
-    Status.make_single_job_file(
-        test_ctx.obj["OUT_DIR"],
-        test_ctx.obj["COMMAND_NAME"],
-        test_ctx.obj["NAME"],
-        {StatusField.JOB_STATUS: StatusOption.SUCCESSFUL},
-    )
-
-    assert not _should_run(test_ctx)
-    assert_message_was_logged(test_ctx.obj["NAME"], "INFO")
-    assert_message_was_logged("is successful", "INFO")
-    assert_message_was_logged("not re-running", "INFO")
+    for option in [StatusOption.NOT_SUBMITTED, StatusOption.FAILED]:
+        Status.make_single_job_file(
+            test_ctx.obj["OUT_DIR"],
+            test_ctx.obj["COMMAND_NAME"],
+            test_ctx.obj["NAME"],
+            {StatusField.JOB_STATUS: option},
+        )
+
+        assert _should_run(test_ctx)
+        assert not caplog.records
+
+    for option in [StatusOption.SUBMITTED, StatusOption.RUNNING]:
+        Status.make_single_job_file(
+            test_ctx.obj["OUT_DIR"],
+            test_ctx.obj["COMMAND_NAME"],
+            test_ctx.obj["NAME"],
+            {StatusField.JOB_STATUS: option},
+        )
+
+        assert not _should_run(test_ctx)
+        assert_message_was_logged(test_ctx.obj["NAME"], "INFO")
+        assert_message_was_logged(
+            "was found with status", "INFO", clear_records=True
+        )
+        assert not caplog.records
+
+    for option in [StatusOption.SUCCESSFUL, StatusOption.COMPLETE]:
+        Status.make_single_job_file(
+            test_ctx.obj["OUT_DIR"],
+            test_ctx.obj["COMMAND_NAME"],
+            test_ctx.obj["NAME"],
+            {StatusField.JOB_STATUS: option},
+        )
+
+        assert not _should_run(test_ctx)
+        assert_message_was_logged(test_ctx.obj["NAME"], "INFO")
+        assert_message_was_logged("is successful", "INFO")
+        assert_message_was_logged("not re-running", "INFO", clear_records=True)
+        assert not caplog.records
 
 
 def test_kickoff_job_local_basic(test_ctx, assert_message_was_logged):

From 0928a4a04e573ddd08d80b0a05d7bbfd13cea4c1 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Thu, 22 Jun 2023 18:05:34 -0600
Subject: [PATCH 07/61] Add missing command name in log

---
 gaps/cli/execution.py           | 5 +++--
 tests/cli/test_cli_execution.py | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/gaps/cli/execution.py b/gaps/cli/execution.py
index d89ebd22..4ad71b5c 100644
--- a/gaps/cli/execution.py
+++ b/gaps/cli/execution.py
@@ -114,7 +114,8 @@ def _kickoff_local_job(ctx, cmd):
         return
 
     name = ctx.obj["NAME"]
-    logger.info("Running locally with job name %r.", name)
+    command = ctx.obj["COMMAND_NAME"]
+    logger.info("Running %r locally with job name %r.", command, name)
     logger.debug("Submitting the following command:\n%s", cmd)
     Status.mark_job_as_submitted(
         ctx.obj["OUT_DIR"],
@@ -144,7 +145,7 @@ def _kickoff_hpc_job(ctx, cmd, **kwargs):
 
     name = ctx.obj["NAME"]
     command = ctx.obj["COMMAND_NAME"]
-    logger.info("Running %s on HPC with job name %r.", command, name)
+    logger.info("Running %r on HPC with job name %r.", command, name)
     logger.debug("Submitting the following command:\n%s", cmd)
     out = ctx.obj["MANAGER"].submit(name, cmd=cmd, **kwargs)[0]
     id_msg = f" (Job ID #{out})" if out else ""
diff --git a/tests/cli/test_cli_execution.py b/tests/cli/test_cli_execution.py
index aad4be76..06393709 100644
--- a/tests/cli/test_cli_execution.py
+++ b/tests/cli/test_cli_execution.py
@@ -101,7 +101,7 @@ def test_kickoff_job_local(test_ctx, assert_message_was_logged):
     exec_kwargs = {"option": "local"}
     cmd = "python -c \"import warnings; warnings.warn('a test warning')\""
     kickoff_job(test_ctx, cmd, exec_kwargs)
-    assert_message_was_logged("Running locally with job name", "INFO")
+    assert_message_was_logged("Running 'run' locally with job name", "INFO")
     assert_message_was_logged(test_ctx.obj["NAME"], "INFO")
     assert_message_was_logged(cmd, "DEBUG")
     assert_message_was_logged("Subprocess received stderr")
@@ -160,7 +160,7 @@ def _test_submit(cmd):
         "Found extra keys in 'execution_control'! ", "WARNING"
     )
     assert_message_was_logged("dne_arg", "WARNING")
-    assert_message_was_logged("Running on HPC with job name", "INFO")
+    assert_message_was_logged("Running 'run' on HPC with job name", "INFO")
     assert_message_was_logged(test_ctx.obj["NAME"], "INFO")
     assert_message_was_logged("Kicked off job")
     assert_message_was_logged("(Job ID #9999)", clear_records=True)

From df31e7ea264c71f6573b8e295f89157230950397 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Mon, 10 Jul 2023 14:57:00 -0600
Subject: [PATCH 08/61] Added logging to status updates

---
 gaps/status.py       | 17 ++++++++++++++++-
 tests/test_status.py |  9 +++++++--
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/gaps/status.py b/gaps/status.py
index 36d9aa17..2dfa8860 100644
--- a/gaps/status.py
+++ b/gaps/status.py
@@ -1,9 +1,10 @@
 # -*- coding: utf-8 -*-
 """gaps Job status manager. """
 import json
-import logging
 import time
 import shutil
+import pprint
+import logging
 import datetime as dt
 from pathlib import Path
 from copy import deepcopy
@@ -647,6 +648,11 @@ def __init__(self, directory, command, job_name, job_attrs):
         self.out_file = None
 
     def __enter__(self):
+        logger.debug(
+            "Received job attributes: %s",
+            pprint.pformat(self.job_attrs, indent=4),
+        )
+
         self.start_time = dt.datetime.now()
         self.job_attrs.update(
             {
@@ -680,10 +686,19 @@ def __exit__(self, exc_type, exc, traceback):
                     StatusField.JOB_STATUS: StatusOption.SUCCESSFUL,
                 }
             )
+            logger.info(
+                "Command %r complete. Time elapsed: %s. "
+                "Target output file: %r",
+                self.command,
+                time_elapsed,
+                self.out_file,
+            )
         else:
             self.job_attrs.update(
                 {StatusField.JOB_STATUS: StatusOption.FAILED}
             )
+            logger.info("Command %r failed in %s", self.command, time_elapsed)
+
         Status.make_single_job_file(
             self.directory,
             command=self.command,
diff --git a/tests/test_status.py b/tests/test_status.py
index 847cac3d..244f3309 100644
--- a/tests/test_status.py
+++ b/tests/test_status.py
@@ -567,7 +567,7 @@ def test_parse_command_status(tmp_path):
     assert not Status.parse_command_status(tmp_path, "generation", key="dne")
 
 
-def test_status_updates(tmp_path):
+def test_status_updates(tmp_path, assert_message_was_logged):
     """Test `StatusUpdates` context manager"""
 
     assert not list(tmp_path.glob("*"))
@@ -591,6 +591,9 @@ def test_status_updates(tmp_path):
 
         stu.out_file = "my_test_file.h5"
 
+    assert_message_was_logged("Command 'generation' complete.", "INFO")
+    assert_message_was_logged("Target output file: 'my_test_file.h5'", "INFO")
+
     job_files = list(tmp_path.glob("*"))
     assert len(job_files) == 1
     with open(job_files[0]) as job_status:
@@ -605,7 +608,7 @@ def test_status_updates(tmp_path):
     assert test_attrs == TEST_1_ATTRS_1
 
 
-def test_status_for_failed_job(tmp_path):
+def test_status_for_failed_job(tmp_path, assert_message_was_logged):
     """Test `StatusUpdates` context manager for a failed job"""
 
     class _TestError(ValueError):
@@ -629,6 +632,8 @@ class _TestError(ValueError):
     except _TestError:
         pass
 
+    assert_message_was_logged("Command 'generation' failed in", "INFO")
+
     status = Status(tmp_path).update_from_all_job_files()
     status = status["generation"]["test0"]
 

From cd2854e15fe374184d6bb0e7aa2a89dbbfc9f1a6 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Mon, 10 Jul 2023 15:13:10 -0600
Subject: [PATCH 09/61] Updated order of logging statements

---
 gaps/cli/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gaps/cli/config.py b/gaps/cli/config.py
index 437926ec..2ea7ebbc 100644
--- a/gaps/cli/config.py
+++ b/gaps/cli/config.py
@@ -321,12 +321,12 @@ def run(self):
         return (
             self.enable_logging()
             .validate_config()
+            .log_job_info()
             .preprocess_config()
             .set_exec_kwargs()
             .set_logging_options()
             .set_exclude_from_status()
             .prepare_context()
-            .log_job_info()
             .kickoff_jobs()
         )
 

From 386b999009a26d3dc2e1d50ae79538759de26762 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Tue, 11 Jul 2023 11:28:25 -0600
Subject: [PATCH 10/61] Added to QOS documentation

---
 gaps/cli/documentation.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py
index 0c4c333a..6d21b9a6 100644
--- a/gaps/cli/documentation.py
+++ b/gaps/cli/documentation.py
@@ -210,7 +210,10 @@
                  tp use as well as the base AU cost.
         :allocation: (str) HPC project (allocation) handle.
         :walltime: (int) Node walltime request in hours.
-        :qos: (str) Quality-of-service specifier. By default, ``"normal"``.
+        :qos: (str) Quality-of-service specifier. On Eagle or Kestrel,
+              this should be one of {{'standby', 'normal', 'high'}}.
+              Note that 'high' priority doubles the AU cost. By default,
+              ``"normal"``.
         :memory: (int, optional) Node memory request in GB. Default is not to
                  specify.{n}{mw}
         :queue: (str, optional; PBS ONLY) HPC queue to submit job to.

From 74cc127c4d8967835da5abf802fe70e5006bd66d Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Thu, 20 Jul 2023 18:28:36 -0600
Subject: [PATCH 11/61] Minor docstring updates

---
 gaps/cli/preprocessing.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/gaps/cli/preprocessing.py b/gaps/cli/preprocessing.py
index 1ba2cd94..dc5c4901 100644
--- a/gaps/cli/preprocessing.py
+++ b/gaps/cli/preprocessing.py
@@ -71,13 +71,13 @@ def preprocess_collect_config(
         out filepath input from the user.
     command_name : str
         Name of the command being run. This is used to parse the
-        pipeline status for output files if "collect_pattern"="PIPELINE"
-        in the input `config`.
+        pipeline status for output files if
+        ``collect_pattern="PIPELINE"`` in the input `config`.
     collect_pattern : str | list | dict, optional
-        Unix-style /filepath/pattern*.h5 representing the files to be
-        collected into a single output HDF5 file. If no output file path
-        is specified (i.e. this input is a single pattern or a list of
-        patterns), the output file path will be inferred from the
+        Unix-style ``/filepath/pattern*.h5`` representing the files to
+        be collected into a single output HDF5 file. If no output file
+        path is specified (i.e. this input is a single pattern or a list
+        of patterns), the output file path will be inferred from the
         pattern itself (specifically, the wildcard will be removed
         and the result will be the output file path). If a list of
         patterns is provided, each pattern will be collected into a
@@ -86,10 +86,10 @@ def preprocess_collect_config(
         output file (including the filename itself; relative paths are
         allowed) and the values are patterns representing the files that
         should be collected into the output file. If running a collect
-        job as part of a pipeline, this input can be set to "PIPELINE",
-        which will parse the output of the previous step and generate
-        the input file pattern and output file name automatically.
-        By default, ``"PIPELINE"``.
+        job as part of a pipeline, this input can be set to
+        ``"PIPELINE"``, which will parse the output of the previous step
+        and generate the input file pattern and output file name
+        automatically. By default, ``"PIPELINE"``.
 
     Returns
     -------

From 7eae807f49a2f732c4c2122624a5fefc212c5a15 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Fri, 21 Jul 2023 10:48:41 -0600
Subject: [PATCH 12/61] Update docstrings

---
 gaps/cli/documentation.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py
index 6d21b9a6..bce85410 100644
--- a/gaps/cli/documentation.py
+++ b/gaps/cli/documentation.py
@@ -214,22 +214,24 @@
               this should be one of {{'standby', 'normal', 'high'}}.
               Note that 'high' priority doubles the AU cost. By default,
               ``"normal"``.
-        :memory: (int, optional) Node memory request in GB. Default is not to
-                 specify.{n}{mw}
+        :memory: (int, optional) Node memory request in GB. By default,
+                 ``None``, which does not specify a memory limit.{n}{mw}
         :queue: (str, optional; PBS ONLY) HPC queue to submit job to.
                 Examples include: 'debug', 'short', 'batch', 'batch-h',
-                'long', etc. By default, `None`, which uses `test_queue`.
+                'long', etc. By default, ``None``, which uses "test_queue".
         :feature: (str, optional) Additional flags for SLURM job
-                  (e.g. "-p debug"). Default is not to specify.
+                  (e.g. "-p debug"). By default, ``None``, which does not
+                  specify any additional flags.
         :conda_env: (str, optional) Name of conda environment to activate.
-                    Default is not to load any environments.
-        :module: (str, optional) Module to load. Default is not to load any
-                 modules.
+                    By default, ``None``, which does not load any environments.
+        :module: (str, optional) Module to load. By default, ``None``, which
+                 does not load any modules.
         :sh_script: (str, optional) Extra shell script to run before
-                    command call. Default is not to run any scripts.
+                    command call. By default, ``None``, which does not run any
+                    scripts.
 
-        Only the "option" input is required for local execution. For
-        execution on the HPC, the allocation and walltime are also
+        Only the `option` key is required for local execution. For
+        execution on the HPC, the `allocation` and `walltime` keys are also
         required. All other options are populated with default values,
         as seen above.
     log_directory : str
@@ -247,8 +249,7 @@
     "\n        :nodes: (int, optional) Number of nodes to split the project "
     "\n                points across. Note that the total number of requested "
     "\n                nodes for a job may be larger than this value if the "
-    "\n                command splits across other inputs (e.g. analysis "
-    "\n                years) Default is 1."
+    "\n                command splits across other inputs. Default is ``1``."
 )
 MW_DOC = "\n        :max_workers: ({type}) {desc}"
 

From 5fbe55b9000cc0cc34701c767e22ee333337b247 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Fri, 21 Jul 2023 13:59:09 -0600
Subject: [PATCH 13/61] Allow extra args to be added to exec control

---
 gaps/cli/config.py                  |  44 +++---
 gaps/cli/documentation.py           | 141 +++++++++++------
 gaps/cli/execution.py               |   3 +-
 tests/cli/test_cli_config.py        |  13 +-
 tests/cli/test_cli_documentation.py | 224 +++++++++++++++++++++-------
 5 files changed, 309 insertions(+), 116 deletions(-)

diff --git a/gaps/cli/config.py b/gaps/cli/config.py
index 2ea7ebbc..d3b7f33c 100644
--- a/gaps/cli/config.py
+++ b/gaps/cli/config.py
@@ -19,6 +19,7 @@
 from gaps.config import load_config
 from gaps.log import init_logger
 from gaps.cli.execution import kickoff_job
+from gaps.cli.documentation import EXTRA_EXEC_PARAMS
 from gaps.exceptions import gapsKeyError
 from gaps.warnings import gapsWarning
 
@@ -162,17 +163,21 @@ def set_exec_kwargs(self):
         }
 
         self.exec_kwargs.update(self.config.get("execution_control", {}))
-        if "max_workers" in self.config:
-            max_workers = self.config.pop("max_workers")
+        extra_params = set()
+        for extra_exec_param in EXTRA_EXEC_PARAMS:
+            if extra_exec_param in self.config:
+                extra_params.add(extra_exec_param)
+                param = self.config.pop(extra_exec_param)
+                self.exec_kwargs[extra_exec_param] = param
+
+        if extra_params:
             msg = (
-                "Found key 'max_workers' outside of 'execution_control'. "
-                f"Moving 'max_workers' value ({max_workers}) into "
-                "'execution_control' block. To silence this warning, "
-                "please specify 'max_workers' inside of the "
-                "'execution_control' block."
+                f"Found key(s) {extra_params} outside of 'execution_control'. "
+                "Moving these keys into 'execution_control' block. To "
+                "silence this warning, please specify these keys inside of "
+                "the 'execution_control' block."
             )
             warn(msg, gapsWarning)
-            self.exec_kwargs["max_workers"] = max_workers
 
         return self
 
@@ -224,7 +229,10 @@ def kickoff_jobs(self):
         num_jobs_submit = len(jobs)
         self._warn_about_excessive_au_usage(num_jobs_submit)
         n_zfill = len(str(num_jobs_submit))
-        max_workers_per_node = self.exec_kwargs.pop("max_workers", None)
+        extra_exec_args = {}
+        for param in EXTRA_EXEC_PARAMS:
+            if param in self.exec_kwargs:
+                extra_exec_args[param] = self.exec_kwargs.pop(param)
         for node_index, values in enumerate(jobs):
             if num_jobs_submit > 1:
                 tag = f"{TAG}{str(node_index).zfill(n_zfill)}"
@@ -242,12 +250,12 @@ def kickoff_jobs(self):
                     "job_name": job_name,
                     "out_dir": self.project_dir.as_posix(),
                     "out_fpath": (self.project_dir / job_name).as_posix(),
-                    "max_workers": max_workers_per_node,
                     "run_method": getattr(
                         self.command_config, "run_method", None
                     ),
                 }
             )
+            node_specific_config.update(extra_exec_args)
 
             for key, val in zip(keys_to_run, values):
                 if isinstance(key, str):
@@ -349,8 +357,9 @@ def _ensure_required_args_exist(config, documentation):
         name for name in documentation.required_args if name not in config
     }
 
-    if documentation.max_workers_required:
-        missing = _mark_max_workers_missing_if_needed(config, missing)
+    missing = _mark_extra_exec_params_missing_if_needed(
+        documentation, config, missing
+    )
 
     if any(missing):
         msg = (
@@ -360,12 +369,13 @@ def _ensure_required_args_exist(config, documentation):
         raise gapsKeyError(msg)
 
 
-def _mark_max_workers_missing_if_needed(config, missing):
-    """Add max_workers as missing key if it is not in `execution_control."""
-
+def _mark_extra_exec_params_missing_if_needed(documentation, config, missing):
+    """Add extra exec params as missing if not found in `execution_control."""
     exec_control = config.get("execution_control", {})
-    if "max_workers" not in config and "max_workers" not in exec_control:
-        missing |= {"max_workers"}
+    for param in EXTRA_EXEC_PARAMS:
+        if documentation.param_required(param):
+            if param not in config and param not in exec_control:
+                missing |= {param}
     return missing
 
 
diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py
index bce85410..8ce8aa8a 100644
--- a/gaps/cli/documentation.py
+++ b/gaps/cli/documentation.py
@@ -3,6 +3,7 @@
 CLI documentation utilities.
 """
 from copy import deepcopy
+from functools import lru_cache
 from inspect import signature, isclass
 
 from numpydoc.docscrape import NumpyDocString
@@ -24,6 +25,24 @@
     "sh_script": None,
 }
 
+EXTRA_EXEC_PARAMS = {
+    "max_workers": "Maximum number of parallel workers run on each node.",
+    "sites_per_worker": "Number of sites to run in series on a worker.",
+    "mem_util_lim": (
+        "Memory utilization limit (fractional). Must be a value between 0 "
+        "and 100. This input sets how much data will be stored in-memory "
+        "at any given time before flushing to disk."
+    ),
+    "timeout": (
+        "Number of seconds to wait for parallel run iteration to complete "
+        "before early termination"
+    ),
+    "pool_size": (
+        "Number of futures to submit to a single process pool for "
+        "parallel futures."
+    ),
+}
+
 CONFIG_TYPES = [
     ConfigType.JSON,
     ConfigType.YAML,
@@ -215,7 +234,7 @@
               Note that 'high' priority doubles the AU cost. By default,
               ``"normal"``.
         :memory: (int, optional) Node memory request in GB. By default,
-                 ``None``, which does not specify a memory limit.{n}{mw}
+                 ``None``, which does not specify a memory limit.{n}{eep}
         :queue: (str, optional; PBS ONLY) HPC queue to submit job to.
                 Examples include: 'debug', 'short', 'batch', 'batch-h',
                 'long', etc. By default, ``None``, which uses "test_queue".
@@ -251,7 +270,7 @@
     "\n                nodes for a job may be larger than this value if the "
     "\n                command splits across other inputs. Default is ``1``."
 )
-MW_DOC = "\n        :max_workers: ({type}) {desc}"
+EXTRA_EXEC_PARAM_DOC = "\n        :{name}: ({type}) {desc}"
 
 
 class CommandDocumentation:
@@ -294,7 +313,7 @@ def __init__(self, *functions, skip_params=None, is_split_spatially=False):
             p.name: p for doc in self.docs for p in doc["Parameters"]
         }
         self.skip_params = set() if skip_params is None else set(skip_params)
-        self.skip_params |= {"cls", "self", "max_workers"}
+        self.skip_params |= {"cls", "self"} | set(EXTRA_EXEC_PARAMS)
         self.is_split_spatially = is_split_spatially
 
     @property
@@ -303,64 +322,61 @@ def default_exec_values(self):
         exec_vals = deepcopy(DEFAULT_EXEC_VALUES)
         if not self.is_split_spatially:
             exec_vals.pop("nodes", None)
-        if self.max_workers_in_func_signature:
-            exec_vals["max_workers"] = (
-                self.REQUIRED_TAG
-                if self.max_workers_required
-                else self.max_workers_param.default
-            )
+        for param in EXTRA_EXEC_PARAMS:
+            if self._param_in_func_signature(param):
+                exec_vals[param] = (
+                    self.REQUIRED_TAG
+                    if self.param_required(param)
+                    else self._param_value(param).default
+                )
         return exec_vals
 
     @property
     def exec_control_doc(self):
         """str: Execution_control documentation."""
         nodes_doc = NODES_DOC if self.is_split_spatially else ""
-        return EXEC_CONTROL_DOC.format(n=nodes_doc, mw=self._max_workers_doc)
+        return EXEC_CONTROL_DOC.format(
+            n=nodes_doc, eep=self._extra_exec_param_doc
+        )
 
     @property
-    def _max_workers_doc(self):
-        """str: `MW_DOC` formatted with the info from the input func."""
-        param = self.param_docs.get("max_workers")
+    def _extra_exec_param_doc(self):
+        """str: Docstring formatted with the info from the input func."""
+        return "".join(
+            [
+                self._format_extra_exec_param_doc(param_name)
+                for param_name in EXTRA_EXEC_PARAMS
+            ]
+        )
+
+    def _format_extra_exec_param_doc(self, param_name):
+        """Format extra exec control parameters"""
+        param = self.param_docs.get(param_name)
         try:
-            return MW_DOC.format(type=param.type, desc=" ".join(param.desc))
+            return EXTRA_EXEC_PARAM_DOC.format(
+                name=param_name, type=param.type, desc=" ".join(param.desc)
+            )
         except AttributeError:
             pass
 
-        if self.max_workers_in_func_signature:
-            return MW_DOC.format(
-                type=(
-                    "(int)" if self.max_workers_required else "(int, optional)"
-                ),
-                desc=(
-                    "Maximum number of parallel workers run on each node."
-                    "Default is `None`, which uses all available cores."
-                ),
+        if self._param_in_func_signature(param_name):
+            if self.param_required(param_name):
+                param_type = "int"
+                default_text = ""
+            else:
+                default_val = self._param_value(param_name).default
+                if default_val is None:
+                    param_type = "int, optional"
+                else:
+                    param_type = f"{type(default_val).__name__}, optional"
+                default_text = f"By default, ``{default_val}``."
+            return EXTRA_EXEC_PARAM_DOC.format(
+                name=param_name,
+                type=param_type,
+                desc=" ".join([EXTRA_EXEC_PARAMS[param_name], default_text]),
             )
-
         return ""
 
-    @property
-    def max_workers_in_func_signature(self):
-        """bool: `True` if "max_workers" is a param of the input function."""
-        return self.max_workers_param is not None
-
-    @property
-    def max_workers_param(self):
-        """bool: `True` if "max_workers" is a required parameter of `func`."""
-        for sig in self.signatures:
-            for name in sig.parameters:
-                if name == "max_workers":
-                    return sig.parameters["max_workers"]
-        return None
-
-    @property
-    def max_workers_required(self):
-        """bool: `True` if "max_workers" is a required parameter of `func`."""
-        param = self.max_workers_param
-        if param is None:
-            return False
-        return param.default is param.empty
-
     @property
     def required_args(self):
         """set: Required parameters of the input function."""
@@ -462,6 +478,39 @@ def command_help(self, command_name):
             name=command_name, desc=self.extended_summary
         )
 
+    @lru_cache(maxsize=16)
+    def _param_value(self, param):
+        """Extract parameter if it exists in signature"""
+        for sig in self.signatures:
+            for name in sig.parameters:
+                if name == param:
+                    return sig.parameters[param]
+        return None
+
+    @lru_cache(maxsize=16)
+    def _param_in_func_signature(self, param):
+        """`True` if `param` is a param of the input function."""
+        return self._param_value(param) is not None
+
+    @lru_cache(maxsize=16)
+    def param_required(self, param):
+        """Check wether a parameter is a required input for the run function.
+
+        Parameters
+        ----------
+        param : str
+            Name of parameter to check.
+
+        Returns
+        -------
+        bool
+            ``True`` if `param` is a required parameter of `func`.
+        """
+        param = self._param_value(param)
+        if param is None:
+            return False
+        return param.default is param.empty
+
 
 def _pipeline_command_help(pipeline_config):
     """Generate pipeline command help from a sample config."""
diff --git a/gaps/cli/execution.py b/gaps/cli/execution.py
index 4ad71b5c..e3e53c6c 100644
--- a/gaps/cli/execution.py
+++ b/gaps/cli/execution.py
@@ -74,7 +74,8 @@ def _filter_exec_kwargs(kwargs, func, hardware_option):
     if extra_keys:
         msg = (
             f"Found extra keys in 'execution_control'! The following "
-            f"inputs will be ignored: {extra_keys}"
+            f"inputs will be ignored: {extra_keys}. To silence this warning, "
+            "please remove the extra keys from the 'execution_control' block."
         )
         warn(msg, gapsWarning)
 
diff --git a/tests/cli/test_cli_config.py b/tests/cli/test_cli_config.py
index 0b363b31..c2ba406a 100644
--- a/tests/cli/test_cli_config.py
+++ b/tests/cli/test_cli_config.py
@@ -42,6 +42,7 @@ def _testing_function(
     out_dir,
     out_fpath,
     max_workers,
+    pool_size=16,
     _input2=None,
     _z_0=None,
 ):
@@ -71,6 +72,8 @@ def _testing_function(
         Internal GAPs out filepath.
     max_workers : int
         Max workers.
+    pool_size : int, optional
+            Pool size. By default, ``16``.
     _input2 : float, optional
         Secret input 2. By default, ``None``.
     _z_0 : str, optional
@@ -85,6 +88,7 @@ def _testing_function(
         "_input2": _input2,
         "input3": input3,
         "max_workers": max_workers,
+        "pool_size": pool_size,
         "_z_0": _z_0,
         "out_fpath": out_fpath,
         "out_dir": out_dir,
@@ -130,6 +134,7 @@ def run(
         out_dir,
         out_fpath,
         max_workers,
+        pool_size=16,
         _z_0=None,
     ):
         """Test run function for CLI around.
@@ -154,6 +159,8 @@ def run(
             Internal GAPs out filepath.
         max_workers : int
             Max workers.
+        pool_size : int, optional
+            Pool size. By default, ``16``.
         _z_0 : str, optional
             Secret str. By default, ``None``.
         """
@@ -166,6 +173,7 @@ def run(
             "_input2": self._input2,
             "input3": self._input3,
             "max_workers": max_workers,
+            "pool_size": pool_size,
             "_z_0": _z_0,
             "out_fpath": out_fpath,
             "out_dir": out_dir,
@@ -259,8 +267,8 @@ def pre_processing(config, a_value, a_multiplier):
 
     if "max_workers" in extra_input:
         expected_message = (
-            "Found key 'max_workers' outside of 'execution_control'. "
-            "Moving 'max_workers' value (100) into 'execution_control' block."
+            "Found key(s) {'max_workers'} outside of 'execution_control'. "
+            "Moving these keys into 'execution_control' block."
         )
         assert expected_message in warning_info[1].message.args[0]
 
@@ -278,6 +286,7 @@ def pre_processing(config, a_value, a_multiplier):
     assert outputs["_input2"] == 10
     assert outputs["input3"] is None
     assert outputs["max_workers"] == 100
+    assert outputs["pool_size"] == 16
 
     status = Status(tmp_path).update_from_all_job_files()
     assert len(status["run"]) == 1
diff --git a/tests/cli/test_cli_documentation.py b/tests/cli/test_cli_documentation.py
index 09588114..16f7dc11 100644
--- a/tests/cli/test_cli_documentation.py
+++ b/tests/cli/test_cli_documentation.py
@@ -9,7 +9,11 @@
 
 import pytest
 
-from gaps.cli.documentation import DEFAULT_EXEC_VALUES, CommandDocumentation
+from gaps.cli.documentation import (
+    DEFAULT_EXEC_VALUES,
+    EXTRA_EXEC_PARAMS,
+    CommandDocumentation,
+)
 
 
 def func_no_args():
@@ -26,87 +30,207 @@ def test_command_documentation_copies_skip_params():
     skip_params_set = ["a"]
     doc = CommandDocumentation(func_no_args, skip_params=skip_params_set)
     assert skip_params_set == ["a"]
-    assert doc.skip_params == {"a", "cls", "self", "max_workers"}
+    assert doc.skip_params == {"a", "cls", "self"} | set(EXTRA_EXEC_PARAMS)
 
 
-def test_command_documentation_max_workers():
-    """Test the `CommandDocumentation` with and without `max_workers`."""
+def test_command_documentation_extra_exec_params():
+    """Test the `CommandDocumentation` with extra exec params."""
 
-    def func(max_workers):
+    def func(max_workers, sites_per_worker, mem_util_lim, timeout, pool_size):
         """A short description.
 
         Parameters
         ----------
         max_workers : int
             Number of workers to run.
+        sites_per_worker : float
+            Number of sites to run.
+        mem_util_lim : str
+            A test documentation.
+        timeout : dict
+            A timeout value.
+        pool_size : list
+            A worker pool size.
         """
 
+    expected_parameters = [
+        "max_workers",
+        "sites_per_worker",
+        "mem_util_lim",
+        "timeout",
+        "pool_size",
+    ]
+    expected_types = ["(int)", "(float)", "(str)", "(dict)", "(list)"]
+    expected_decs = [
+        "Number of workers to run.",
+        "Number of sites to run.",
+        "A test documentation.",
+        "A timeout value.",
+        "A worker pool size.",
+    ]
+    expected_iter = zip(expected_parameters, expected_types, expected_decs)
+
     doc = CommandDocumentation(func)
-    assert doc.max_workers_in_func_signature
-    assert doc.max_workers_required
-    assert "Number of workers to run." in doc._max_workers_doc
-    assert "(int)" in doc._max_workers_doc
-    assert "max_workers" in doc.exec_control_doc
+    for param, p_type, p_doc in expected_iter:
+        assert doc._param_in_func_signature(param)
+        assert doc.param_required(param)
+        assert p_doc in doc._format_extra_exec_param_doc(param)
+        assert p_type in doc._format_extra_exec_param_doc(param)
+        assert param in doc.exec_control_doc
 
-    execution_control = doc.template_config["execution_control"]
-    assert execution_control["max_workers"] == doc.REQUIRED_TAG
-    assert doc.default_exec_values["max_workers"] == doc.REQUIRED_TAG
+        execution_control = doc.template_config["execution_control"]
+        assert execution_control[param] == doc.REQUIRED_TAG
+        assert doc.default_exec_values[param] == doc.REQUIRED_TAG
 
-    def func(max_workers):
-        """A short description."""
 
-    doc = CommandDocumentation(func)
-    assert doc.max_workers_in_func_signature
-    assert doc.max_workers_required
-    assert doc._max_workers_doc
-    assert "(int)" in doc._max_workers_doc
-    assert "max_workers" in doc.exec_control_doc
+def test_command_documentation_extra_exec_params_no_user_doc():
+    """Test the `CommandDocumentation` with extra exec params no user doc."""
 
-    execution_control = doc.template_config["execution_control"]
-    assert execution_control["max_workers"] == doc.REQUIRED_TAG
-    assert doc.default_exec_values["max_workers"] == doc.REQUIRED_TAG
+    def func(max_workers, sites_per_worker, mem_util_lim, timeout, pool_size):
+        """A short description."""
 
-    def func(max_workers=2):
+    expected_parameters = [
+        "max_workers",
+        "sites_per_worker",
+        "mem_util_lim",
+        "timeout",
+        "pool_size",
+    ]
+    doc = CommandDocumentation(func)
+    for param in expected_parameters:
+        assert doc._param_in_func_signature(param)
+        assert doc.param_required(param)
+        assert doc._format_extra_exec_param_doc(param)
+        assert "(int)" in doc._format_extra_exec_param_doc(param)
+        assert param in doc.exec_control_doc
+
+        execution_control = doc.template_config["execution_control"]
+        assert execution_control["max_workers"] == doc.REQUIRED_TAG
+        assert doc.default_exec_values["max_workers"] == doc.REQUIRED_TAG
+
+
+def test_command_documentation_extra_exec_params_user_defaults():
+    """Test the `CommandDocumentation` with extra exec params and defaults."""
+
+    def func(
+        max_workers=2,
+        sites_per_worker=0.4,
+        mem_util_lim="test",
+        timeout=None,
+        pool_size=None,
+    ):
         """A short description.
 
         Parameters
         ----------
         max_workers : int, optional
-            Number of workers to run. By default, `2`.
+            Number of workers to run. By default, ``2``.
+        sites_per_worker : float, optional
+            Number of sites to run. By default, ``0.4``.
+        mem_util_lim : str, optional
+            A test documentation. By default, ``"test"``.
+        timeout : dict, optional
+            A timeout value. By default, ``None``.
+        pool_size : list, optional
+            A worker pool size. By default, ``None``.
         """
 
+    expected_parameters = [
+        "max_workers",
+        "sites_per_worker",
+        "mem_util_lim",
+        "timeout",
+        "pool_size",
+    ]
+    expected_types = [
+        "(int, optional)",
+        "(float, optional)",
+        "(str, optional)",
+        "(dict, optional)",
+        "(list, optional)",
+    ]
+    expected_decs = [
+        "Number of workers to run.",
+        "Number of sites to run.",
+        "A test documentation.",
+        "A timeout value.",
+        "A worker pool size.",
+    ]
+    expected_value = [2, 0.4, "test", None, None]
+    expected_iter = zip(
+        expected_parameters, expected_types, expected_decs, expected_value
+    )
+
     doc = CommandDocumentation(func)
-    assert doc.max_workers_in_func_signature
-    assert not doc.max_workers_required
-    assert "Number of workers to run." in doc._max_workers_doc
-    assert "(int, optional)" in doc._max_workers_doc
-    assert "max_workers" in doc.exec_control_doc
+    for param, p_type, p_doc, p_val in expected_iter:
+        assert doc._param_in_func_signature(param)
+        assert not doc.param_required(param)
+        assert p_doc in doc._format_extra_exec_param_doc(param)
+        assert p_type in doc._format_extra_exec_param_doc(param)
+        assert param in doc.exec_control_doc
+
+        execution_control = doc.template_config["execution_control"]
+        assert execution_control[param] == p_val
+        assert doc.default_exec_values[param] == p_val
+
+
+def test_command_documentation_extra_exec_params_defaults_no_docs():
+    """Test documentation with extra exec params, defaults, no doc."""
+
+    def func(
+        max_workers=2,
+        sites_per_worker=0.4,
+        mem_util_lim="test",
+        timeout=None,
+        pool_size=None,
+    ):
+        """A short description."""
 
-    execution_control = doc.template_config["execution_control"]
-    assert execution_control["max_workers"] == 2
-    assert doc.default_exec_values["max_workers"] == 2
+    expected_parameters = [
+        "max_workers",
+        "sites_per_worker",
+        "mem_util_lim",
+        "timeout",
+        "pool_size",
+    ]
+    expected_types = [
+        "(int, optional)",
+        "(float, optional)",
+        "(str, optional)",
+        "(int, optional)",
+        "(int, optional)",
+    ]
 
-    def func(max_workers=None):
-        """A short description."""
+    expected_value = [2, 0.4, "test", None, None]
+    expected_iter = zip(expected_parameters, expected_types, expected_value)
 
     doc = CommandDocumentation(func)
-    assert doc.max_workers_in_func_signature
-    assert not doc.max_workers_required
-    assert doc._max_workers_doc
-    assert "(int, optional)" in doc._max_workers_doc
-    assert "max_workers" in doc.exec_control_doc
+    for param, p_type, p_val in expected_iter:
+        assert doc._param_in_func_signature(param)
+        assert not doc.param_required(param)
+        assert doc._extra_exec_param_doc
+        assert p_type in doc._format_extra_exec_param_doc(param)
+        assert f"``{p_val}``" in doc._format_extra_exec_param_doc(param)
+        assert param in doc.exec_control_doc
+
+        execution_control = doc.template_config["execution_control"]
+        assert execution_control[param] == p_val
+        assert doc.default_exec_values[param] == p_val
+
 
-    execution_control = doc.template_config["execution_control"]
-    assert execution_control["max_workers"] is None
-    assert doc.default_exec_values["max_workers"] is None
+def test_command_documentation_no_extra_exec_params():
+    """Test documentation with no extra exec params"""
 
     doc = CommandDocumentation(func_no_args)
-    assert not doc.max_workers_in_func_signature
-    assert not doc.max_workers_required
-    assert not doc._max_workers_doc
-    assert "max_workers" not in doc.template_config["execution_control"]
-    assert "max_workers" not in doc.default_exec_values
-    assert "max_workers" not in doc.exec_control_doc
+    for param in EXTRA_EXEC_PARAMS:
+        assert not doc._param_in_func_signature(param)
+        assert not doc.param_required(param)
+        assert not doc._format_extra_exec_param_doc(param)
+        assert param not in doc.template_config["execution_control"]
+        assert param not in doc.default_exec_values
+        assert param not in doc.exec_control_doc
+
+    assert not doc._extra_exec_param_doc
 
 
 def test_command_documentation_default_exec_values_and_doc():

From a252d449c73f01d9da156ea2853d43eeb7d89de1 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Sat, 22 Jul 2023 14:16:30 -0600
Subject: [PATCH 14/61] Add main CLI help docs

---
 gaps/cli/cli.py           | 22 +++++++-----
 gaps/cli/documentation.py | 70 ++++++++++++++++++++++++++++++++++++---
 tests/cli/test_cli.py     |  9 +++++
 3 files changed, 88 insertions(+), 13 deletions(-)

diff --git a/gaps/cli/cli.py b/gaps/cli/cli.py
index b9508137..087e30ef 100644
--- a/gaps/cli/cli.py
+++ b/gaps/cli/cli.py
@@ -16,6 +16,7 @@
     CLICommandFromFunction,
     _WrappedCommand,
 )
+from gaps.cli.documentation import _main_command_help
 from gaps.cli.preprocessing import preprocess_collect_config
 from gaps.cli.status import status_command
 
@@ -56,9 +57,8 @@ def convert_to_commands(self):
             command = as_click_command(command_config)
             self.commands.append(command)
             Pipeline.COMMANDS[command_config.name] = command
-            self.template_configs[command_config.name] = (
-                command_config.documentation.template_config
-            )
+            template_config = command_config.documentation.template_config
+            self.template_configs[command_config.name] = template_config
         return self
 
     def add_pipeline_command(self):
@@ -181,6 +181,7 @@ def make_cli(commands, info=None):
     """
     info = info or {}
     command_generator = _CLICommandGenerator(commands)
+    commands = command_generator.generate()
 
     options = [
         click.Option(
@@ -189,12 +190,17 @@ def make_cli(commands, info=None):
             help="Flag to turn on debug logging. Default is not verbose.",
         ),
     ]
-    prog_name = [info["name"]] if "name" in info else []
+
+    prog_name = info.get("name")
+    if prog_name:
+        main_help = _main_command_help(
+            prog_name, command_generator.command_configs
+        )
+    else:
+        main_help = "Command Line Interface"
+
     main = click.Group(
-        help=" ".join(prog_name + ["Command Line Interface"]),
-        params=options,
-        callback=_main_cb,
-        commands=command_generator.generate(),
+        help=main_help, params=options, callback=_main_cb, commands=commands
     )
     version = info.get("version")
     if version is not None:
diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py
index 8ce8aa8a..6ca69c50 100644
--- a/gaps/cli/documentation.py
+++ b/gaps/cli/documentation.py
@@ -49,9 +49,60 @@
     ConfigType.TOML,
 ]
 
+MAIN_DOC = """{name} Command Line Interface.
+
+Typically, a good place to start is to set up a {name} job with a pipeline
+config that points to several {name} modules that you want to run in serial.
+
+To begin, you can generate some template configuration files using::
+
+    $ {name} template-configs
+
+By default, this generates template JSON configuration files, though you
+can request YAML or TOML configuration files instead. YOu can run
+``$ {name} template-configs --help`` on the command line to see all available
+options for the ``template-configs`` command. Once the template configuration
+files have been generated, you can fill them out by referring to the
+module CLI documentation (if available) or the help pages of the module CLIs
+for more details on the config options for each CLI command::
+
+    {cli_help_str}
+
+After appropriately filling our the configuration files for each module you
+want to run, you can call the {name} pipeline CLI using::
+
+    $ {name} pipeline -c config_pipeline.json
+
+This command will run each pipeline step in sequence.
+
+.. Note:: You will need to re-submit the ``pipeline`` command above after
+each completed pipeline step.
+
+To check the status of the pipeline, you can run::
+
+    $ {name} status
+
+This will print a report to the command line detailing the progress of the
+current pipeline. See ``$ {name} status --help`` for all status command
+options.
+
+If you need to parameterize the pipeline execution, you can use the ``batch``
+command. For details on setting up a batch config file, see the documentation
+or run::
+
+    ${name} batch --help
+
+on the command line. Once you set up a batch config file, you can execute
+it using::
+
+    $ {name} status -c config_batch.json
+
+The general structure of the {name} CLI is given below.
+"""
+
 PIPELINE_CONFIG_DOC = """
-Path to the "pipeline" configuration file. This argument can be left out,
-but *one and only one file* with the name "pipeline" should exist in the
+Path to the ``pipeline`` configuration file. This argument can be left out,
+but *one and only one file* with "pipeline" in the name should exist in the
 directory and contain the config information. Below is a sample template config
 
 .. tabs::
@@ -87,7 +138,7 @@
 """
 
 BATCH_CONFIG_DOC = """
-Path to the "batch" configuration file. Below is a sample template config
+Path to the ``batch`` configuration file. Below is a sample template config
 
 .. tabs::
 
@@ -229,8 +280,8 @@
                  tp use as well as the base AU cost.
         :allocation: (str) HPC project (allocation) handle.
         :walltime: (int) Node walltime request in hours.
-        :qos: (str) Quality-of-service specifier. On Eagle or Kestrel,
-              this should be one of {{'standby', 'normal', 'high'}}.
+        :qos: (str, optional) Quality-of-service specifier. On Eagle or
+              Kestrel, this should be one of {{'standby', 'normal', 'high'}}.
               Note that 'high' priority doubles the AU cost. By default,
               ``"normal"``.
         :memory: (int, optional) Node memory request in GB. By default,
@@ -512,6 +563,15 @@ def param_required(self, param):
         return param.default is param.empty
 
 
+def _main_command_help(prog_name, commands):
+    """Generate main command help from commands input."""
+    cli_help_str = "\n\n    ".join(
+        [f"$ {prog_name} --help"]
+        + [f"$ {prog_name} {command.name} --help" for command in commands]
+    )
+    return MAIN_DOC.format(name=prog_name, cli_help_str=cli_help_str)
+
+
 def _pipeline_command_help(pipeline_config):
     """Generate pipeline command help from a sample config."""
     format_inputs = {}
diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py
index c5fbc053..f06b4a03 100644
--- a/tests/cli/test_cli.py
+++ b/tests/cli/test_cli.py
@@ -91,6 +91,13 @@ def test_make_cli():
     ]:
         assert expected_command in main.commands
 
+    assert "test" in main.help
+    assert "$ test --help" in main.help
+    assert "$ test run --help" in main.help
+    assert "$ test analyze --help" in main.help
+    assert "$ test collect-analyze --help" in main.help
+    assert "$ test collect-run --help" not in main.help
+
 
 @pytest.mark.integration
 def test_cli(
@@ -124,6 +131,8 @@ def preprocess_run_config(config, project_dir, out_dir):
 
     main = make_cli(commands)
 
+    assert main.help == "Command Line Interface"
+
     assert not set(tmp_cwd.glob("*"))
     cli_runner.invoke(main, ["template-configs"])
     files = set(tmp_cwd.glob("*"))

From 90ea6d8697137a7ec8cc8f828a0fd746376ddf2d Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Sat, 22 Jul 2023 14:24:35 -0600
Subject: [PATCH 15/61] Minor docstring updates

---
 gaps/cli/documentation.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py
index 6ca69c50..776440f7 100644
--- a/gaps/cli/documentation.py
+++ b/gaps/cli/documentation.py
@@ -59,7 +59,7 @@
     $ {name} template-configs
 
 By default, this generates template JSON configuration files, though you
-can request YAML or TOML configuration files instead. YOu can run
+can request YAML or TOML configuration files instead. You can run
 ``$ {name} template-configs --help`` on the command line to see all available
 options for the ``template-configs`` command. Once the template configuration
 files have been generated, you can fill them out by referring to the
@@ -267,6 +267,9 @@
 
 {desc}
 
+The general structure for calling this CLI command is given below (add
+``--help`` to print help info to the terminal).
+
 """
 EXEC_CONTROL_DOC = """
 Parameters

From e8673c85c042b3d3b8009304fb2083173bd299da Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Sat, 22 Jul 2023 14:28:45 -0600
Subject: [PATCH 16/61] More typo fixes

---
 gaps/cli/documentation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py
index 776440f7..91baf52b 100644
--- a/gaps/cli/documentation.py
+++ b/gaps/cli/documentation.py
@@ -76,7 +76,7 @@
 This command will run each pipeline step in sequence.
 
 .. Note:: You will need to re-submit the ``pipeline`` command above after
-each completed pipeline step.
+          each completed pipeline step.
 
 To check the status of the pipeline, you can run::
 
@@ -90,7 +90,7 @@
 command. For details on setting up a batch config file, see the documentation
 or run::
 
-    ${name} batch --help
+    $ {name} batch --help
 
 on the command line. Once you set up a batch config file, you can execute
 it using::

From 03f7807ba09d25efe48b271807cd457a41ebe596 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Sat, 22 Jul 2023 14:57:49 -0600
Subject: [PATCH 17/61] Add extra command help documentation

---
 gaps/cli/batch.py     |  6 +++++-
 gaps/cli/pipeline.py  |  6 +++++-
 gaps/cli/status.py    |  3 +++
 gaps/cli/templates.py | 11 ++++++++---
 4 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/gaps/cli/batch.py b/gaps/cli/batch.py
index 3853f840..b287dbaf 100644
--- a/gaps/cli/batch.py
+++ b/gaps/cli/batch.py
@@ -64,7 +64,11 @@ def batch_command():
         context_settings=None,
         callback=_batch,
         params=params,
-        help="Execute an analysis pipeline over a parametric set of inputs",
+        help=(
+            "Execute an analysis pipeline over a parametric set of inputs.\n\n"
+            "The general structure for calling this CLI command is given "
+            "below (add``--help`` to print help info to the terminal)."
+        ),
         epilog=None,
         short_help=None,
         options_metavar="[OPTIONS]",
diff --git a/gaps/cli/pipeline.py b/gaps/cli/pipeline.py
index 5288d8d1..ef1dfae7 100644
--- a/gaps/cli/pipeline.py
+++ b/gaps/cli/pipeline.py
@@ -127,7 +127,11 @@ def pipeline_command(template_config):
         context_settings=None,
         callback=pipeline,
         params=params,
-        help="""Execute multiple steps in an analysis pipeline""",
+        help=(
+            "Execute multiple steps in an analysis pipeline.\n\n"
+            "The general structure for calling this CLI command is given "
+            "below (add``--help`` to print help info to the terminal)."
+        ),
         epilog=None,
         short_help=None,
         options_metavar="[OPTIONS]",
diff --git a/gaps/cli/status.py b/gaps/cli/status.py
index 9a4e42c7..1f7f8eb0 100644
--- a/gaps/cli/status.py
+++ b/gaps/cli/status.py
@@ -48,6 +48,9 @@
 Display the status of a project FOLDER.
 
 By default, the status of the current working directory is displayed.
+
+The general structure for calling this CLI command is given below
+(add``--help`` to print help info to the terminal)."
 """
 
 
diff --git a/gaps/cli/templates.py b/gaps/cli/templates.py
index a1893c46..52c241a5 100644
--- a/gaps/cli/templates.py
+++ b/gaps/cli/templates.py
@@ -16,6 +16,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 # pylint: disable=redefined-builtin
 @click.pass_context
 def _make_template_config(ctx, commands, type, configs):
@@ -97,9 +98,13 @@ def template_command(template_configs):
             configs=template_configs,
         ),
         params=params,
-        help="Generate template config files for requested COMMANDS. If no "
-        "COMMANDS are given, config files for the entire pipeline are "
-        "generated.",
+        help=(
+            "Generate template config files for requested COMMANDS. If no "
+            "COMMANDS are given, config files for the entire pipeline are "
+            "generated.\n\nThe general structure for calling this CLI "
+            "command is given below (add``--help`` to print help info to "
+            "the terminal)."
+        ),
         epilog=None,
         short_help=None,
         options_metavar="",

From 1a9c6364251b0725a68450079f21c95103884694 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Sat, 22 Jul 2023 18:43:23 -0600
Subject: [PATCH 18/61] Added minor text to doc

---
 gaps/cli/cli.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gaps/cli/cli.py b/gaps/cli/cli.py
index 087e30ef..0911ef57 100644
--- a/gaps/cli/cli.py
+++ b/gaps/cli/cli.py
@@ -168,6 +168,8 @@ def make_cli(commands, info=None):
 
             name : str
                 Name of program to display at the top of the CLI help.
+                This input is optional, but specifying it yields richer
+                documentation for the main CLI command.
             version : str
                 Include program version with a ``--version`` CLI option.
 

From 87323f95972611db23c5438446eefe70f758df08 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Sat, 22 Jul 2023 18:44:10 -0600
Subject: [PATCH 19/61] Documentation updates

---
 gaps/cli/command.py | 54 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 41 insertions(+), 13 deletions(-)

diff --git a/gaps/cli/command.py b/gaps/cli/command.py
index 4d1417af..c339700f 100644
--- a/gaps/cli/command.py
+++ b/gaps/cli/command.py
@@ -101,6 +101,13 @@ def __init__(
             configuration files, so it should be thoroughly documented
             (using a `NumPy Style Python Docstring
             <https://numpydoc.readthedocs.io/en/latest/format.html>`_).
+            In particular, the "Extended Summary" and the "Parameters"
+            section will be pulled form the docstring.
+
+            .. WARNING:: The "Extended Summary" section may not show up
+                         properly if the short "Summary" section is
+                         missing.
+
             This function must return the path to the output file it
             generates (or a list of paths if multiple output files are
             generated). If no output files are generated, the function
@@ -146,18 +153,24 @@ def __init__(
                     output filename will contain the tag. Also note that
                     this string *WILL NOT* contain a file-ending, so
                     that will have to be added by the node function.
-                max_workers : int
-                    Max number of workers to run per node. This option
-                    will be requested from the user within the
-                    "execution_control" dictionary in the config file.
 
             If your function is capable of multiprocessing, you should
             also include ``max_workers`` in the function signature.
             ``gaps`` will pass an integer equal to the number of
             processes the user wants to run on a single node for this
-            value. Note that the ``config`` parameter is not allowed as
+            value.
+
+            .. WARNING:: The keywords ``{"max-workers",
+                         "sites_per_worker", "mem_util_lim", "timeout",
+                         "pool_size"}`` are assumed to describe
+                         execution control. If you request any of these
+                         as function arguments, users of your CLI will
+                         specify them in the `execution_control` block
+                         of the input config file.
+
+            Note that the ``config`` parameter is not allowed as
             a function signature item. Please request all the required
-            keys/inputs directly. This function can also request
+            keys/inputs directly instead. This function can also request
             "private" arguments by including a leading underscore in the
             argument name. These arguments are NOT exposed to users in
             the documentation or template configuration files. Instead,
@@ -370,7 +383,7 @@ def __init__(
 
         Parameters
         ----------
-        init : class initializer
+        init : class
             The class to be initialized and used to to run on individual
             nodes. The class must implement ``method``. The initializer,
             along with the corresponding method, will be used to
@@ -378,6 +391,15 @@ def __init__(
             files, so it should be thoroughly documented
             (using a `NumPy Style Python Docstring
             <https://numpydoc.readthedocs.io/en/latest/format.html>`_).
+            In particular, the "Extended Summary" (from the ``__init__``
+            method *only*) and the "Parameters" section (from *both* the
+            ``__init__`` method and the run `method` given below) will
+            be pulled form the docstring.
+
+            .. WARNING:: The "Extended Summary" section may not display
+                         properly if the short "Summary" section in the
+                         ``__init__`` method is missing.
+
         method : str
             The name of a method of the ``init`` class to act as the
             model function to run across multiple nodes on the HPC. This
@@ -426,18 +448,24 @@ def __init__(
                     output filename will contain the tag. Also note that
                     this string *WILL NOT* contain a file-ending, so
                     that will have to be added by the node function.
-                max_workers : int
-                    Max number of workers to run per node. This option
-                    will be requested from the user within the
-                    "execution_control" dictionary in the config file.
 
             If your function is capable of multiprocessing, you should
             also include ``max_workers`` in the function signature.
             ``gaps`` will pass an integer equal to the number of
             processes the user wants to run on a single node for this
-            value. Note that the ``config`` parameter is not allowed as
+            value.
+
+            .. WARNING:: The keywords ``{"max-workers",
+                         "sites_per_worker", "mem_util_lim", "timeout",
+                         "pool_size"}`` are assumed to describe
+                         execution control. If you request any of these
+                         as function arguments, users of your CLI will
+                         specify them in the `execution_control` block
+                         of the input config file.
+
+            Note that the ``config`` parameter is not allowed as
             a function signature item. Please request all the required
-            keys/inputs directly. This function can also request
+            keys/inputs directly instead. This function can also request
             "private" arguments by including a leading underscore in the
             argument name. These arguments are NOT exposed to users in
             the documentation or template configuration files. Instead,

From 6975ec1a676c29abeba418046c620f02dcb60167 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Tue, 1 Aug 2023 13:21:22 -0600
Subject: [PATCH 20/61] Update README with more info about target audience

---
 README.rst | 36 ++++++++++++++++++++++++++----------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/README.rst b/README.rst
index e407e470..9b9af39e 100644
--- a/README.rst
+++ b/README.rst
@@ -24,21 +24,37 @@ Welcome to Geospatial Analysis Pipelines (GAPs)!
 .. inclusion-intro
 
 Geospatial Analysis Pipelines (GAPs) is a framework designed
-to assist users in scaling their geospatial models to a
-High-Performance Computing (HPC) environment. In particular,
-GAPs automatically distributes the execution of a
-single-location model (such as the `System Advisor Model <https://sam.nrel.gov>`_)
-over a large geospatial extent (e.g. CONUS) across many parallel
-HPC nodes. Born from the open-source `reV <https://github.com/NREL/reV>`_ model, GAPs is a
+to assist researchers and software developers add execution
+tools to their geospatial python models. Born from the
+open-source `reV <https://github.com/NREL/reV>`_ model, GAPs is a
 robust and easy-to-use engine that provides a rich set of features
-such as configuration file generation, job status monitoring,
-CLI Documentation, and more.
-
+such as command-line interface (CLI) generation and documentation,
+basic High-Performance Computing (HPC) scaling capabilities,
+configuration file generation, job status monitoring, and more.
+
+
+Who should use GAPs
+===================
+GAPs is intended to be used by researchers and/or software developers
+who have implemented a working python model but have not yet added any
+external model execution tools. Within minimal effort, developers can
+use GAPs to add a variety of utility for end-users, including a complete
+set of CLI commands and documentation pulled from the model run function
+docstrings. In addition, GAPs provides basic HPC execution capabilities,
+particularly catered towards embarrassingly parallel geospatial models
+(e.g. single-location models such as the `System Advisor Model <https://sam.nrel.gov>`_).
+GAPs can automatically distribute the execution of such models over a large
+geospatial extent (e.g. CONUS) across many parallel HPC nodes.
+
+GAPs is **NOT** a workflow management system (WMS), and therefore does not
+provide any of the in-depth tools/capabilities expected from a proper WMS.
+However, GAPs-supported models can sometimes be included as part of the workflow in
+WMS tools like `Torc <https://pages.github.nrel.gov/viz/wms/index.html#/>`_.
 
 To get started, take a look at the `documentation <https://nrel.github.io/gaps/>`_ (examples coming soon!)
 
 
-Installing gaps
+Installing GAPs
 ===============
 
 NOTE: The installation instruction below assume that you have python installed

From 69784236fe41bf3e5abbb34c7293605abc783e15 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Tue, 1 Aug 2023 18:12:12 -0600
Subject: [PATCH 21/61] Add explanation about `tag` to docs

---
 gaps/cli/command.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/gaps/cli/command.py b/gaps/cli/command.py
index c339700f..675349a5 100644
--- a/gaps/cli/command.py
+++ b/gaps/cli/command.py
@@ -203,7 +203,13 @@ def __init__(
             function *must* return the path (as a string) to the output
             file it generates in order for users to be able to use
             ``"PIPELINE"`` as the input to the ``collect_pattern`` key
-            in the collection config. By default, ``False``.
+            in the collection config. The path returned by this function
+            must also include the ``tag`` in the output file name in
+            order for collection to function properly (an easy way to do
+            this is to request ``tag`` in the function signature and
+            name the output file generated by the function using the
+            ``f"{out_file_name}{tag}.{extension}"`` format).
+            By default, ``False``.
         split_keys : set | container, optional
             A set of strings identifying the names of the config keys
             that ``gaps`` should split the function execution on. To
@@ -497,7 +503,13 @@ def __init__(
             function *must* return the path (as a string) to the output
             file it generates in order for users to be able to use
             ``"PIPELINE"`` as the input to the ``collect_pattern`` key
-            in the collection config. By default, ``False``.
+            in the collection config. The path returned by this function
+            must also include the ``tag`` in the output file name in
+            order for collection to function properly (an easy way to do
+            this is to request ``tag`` in the function signature and
+            name the output file generated by the function using the
+            ``f"{out_file_name}{tag}.{extension}"`` format).
+            By default, ``False``.
         split_keys : set | container, optional
             A set of strings identifying the names of the config keys
             that ``gaps`` should split the function execution on. To

From b7bee0e519975c80656ba3dd34a7c2fd2c8b674d Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Tue, 1 Aug 2023 18:14:42 -0600
Subject: [PATCH 22/61] Added extra tests for pipeline specification

---
 gaps/pipeline.py                   | 37 ++++++++++++++++----
 tests/cli/test_cli_pipeline.py     | 16 ++++++---
 tests/cli/test_cli_preprocesing.py |  3 ++
 tests/test_pipeline.py             | 56 ++++++++++++++++++++++++------
 4 files changed, 90 insertions(+), 22 deletions(-)

diff --git a/gaps/pipeline.py b/gaps/pipeline.py
index 26e900b8..fb31ab37 100644
--- a/gaps/pipeline.py
+++ b/gaps/pipeline.py
@@ -49,13 +49,7 @@ def __init__(self, pipeline, monitor=True):
         self._out_dir = self._out_dir.as_posix()
 
         config = load_config(pipeline)
-
-        if "pipeline" not in config:
-            raise gapsConfigError(
-                'Could not find required key "pipeline" in the '
-                "pipeline config."
-            )
-        self._run_list = config["pipeline"]
+        self._run_list = _check_pipeline(config)
         self._init_status()
 
         init_logging_from_config(config)
@@ -235,6 +229,35 @@ def run(cls, pipeline, monitor=True):
         cls(pipeline, monitor=monitor)._main()
 
 
+def _check_pipeline(config):
+    """Check pipeline steps input."""
+
+    if "pipeline" not in config:
+        raise gapsConfigError(
+            "Could not find required key "
+            '"pipeline" '
+            "in the pipeline config."
+        )
+
+    pipeline = config["pipeline"]
+
+    if not isinstance(pipeline, list):
+        raise gapsConfigError(
+            'Config arg "pipeline" must be a list of '
+            f"{{command: f_config}} pairs, but received {type(pipeline)}."
+        )
+
+    for step_dict in pipeline:
+        for f_config in step_dict.values():
+            if not Path(f_config).expanduser().resolve().exists():
+                raise gapsConfigError(
+                    "Pipeline step depends on non-existent "
+                    f"file: {f_config}"
+                )
+
+    return pipeline
+
+
 def _parse_code_array(arr):
     """Parse array of return codes to get single return code for command."""
 
diff --git a/tests/cli/test_cli_pipeline.py b/tests/cli/test_cli_pipeline.py
index a74df1bd..027142c3 100644
--- a/tests/cli/test_cli_pipeline.py
+++ b/tests/cli/test_cli_pipeline.py
@@ -30,6 +30,7 @@
         {"run": "./config_run.json"},
     ],
 }
+SUCCESS_CONFIG = {"test": "success"}
 
 
 @pytest.fixture
@@ -47,7 +48,9 @@ def runnable_pipeline():
 def run(config):
     """Test command."""
     config_fp = Path(config)
-    config_fp.touch()
+    with open(config_fp, "w") as config_file:
+        json.dump(SUCCESS_CONFIG, config_file)
+
     attrs = {StatusField.JOB_STATUS: StatusOption.SUCCESSFUL}
     Status.make_single_job_file(config_fp.parent, "run", "test", attrs)
 
@@ -85,11 +88,12 @@ def test_pipeline_command(
     """Test the pipeline_command creation."""
 
     target_config_fp = tmp_path / "config_run.json"
+    target_config_fp.touch()
+
     pipe_config_fp = tmp_path / "config_pipe.json"
     with open(pipe_config_fp, "w") as config_file:
         json.dump(SAMPLE_CONFIG, config_file)
 
-    assert not target_config_fp.exists()
     pipe = pipeline_command({})
     if _can_run_background():
         assert "background" in [opt.name for opt in pipe.params]
@@ -100,7 +104,8 @@ def test_pipeline_command(
     if not extra_args:
         cli_runner.invoke(pipe, ["-c", pipe_config_fp.as_posix()])
 
-    assert target_config_fp.exists()
+    with open(target_config_fp, "r") as config:
+        assert json.load(config) == SUCCESS_CONFIG
     assert_message_was_logged("Pipeline job", "INFO")
     assert_message_was_logged("is complete.", "INFO")
 
@@ -164,7 +169,7 @@ def test_ppl_command_no_config_arg(
     target_config_fp = tmp_cwd / "config_run.json"
     pipe_config_fp = tmp_cwd / "config_pipeline.json"
 
-    assert not target_config_fp.exists()
+    target_config_fp.touch()
     assert not pipe_config_fp.exists()
     pipe = pipeline_command({})
     result = cli_runner.invoke(pipe)
@@ -176,7 +181,8 @@ def test_ppl_command_no_config_arg(
         json.dump(SAMPLE_CONFIG, config_file)
 
     cli_runner.invoke(pipe)
-    assert target_config_fp.exists()
+    with open(target_config_fp, "r") as config:
+        assert json.load(config) == SUCCESS_CONFIG
 
     cli_runner.invoke(pipe)
     assert_message_was_logged("Pipeline job", "INFO")
diff --git a/tests/cli/test_cli_preprocesing.py b/tests/cli/test_cli_preprocesing.py
index dfb374aa..02e2c470 100644
--- a/tests/cli/test_cli_preprocesing.py
+++ b/tests/cli/test_cli_preprocesing.py
@@ -84,6 +84,9 @@ def test_preprocess_collect_config_pipeline_input(tmp_path):
     with open(config_fp, "w") as file_:
         json.dump(SAMPLE_CONFIG, file_)
 
+    (tmp_path / "config.json").touch()
+    (tmp_path / "collect_config.json").touch()
+
     Pipeline(config_fp)
 
     job_files = [
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index de7578a7..1e24112f 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -38,8 +38,8 @@
 SAMPLE_CONFIG = {
     "logging": {"log_level": "INFO"},
     "pipeline": [
-        {"run": "./config.json"},
-        {"collect-run": "./collect_config.json"},
+        {"run": "./config.{config_type}"},
+        {"collect-run": "./collect_config.{config_type}"},
     ],
 }
 
@@ -53,17 +53,31 @@ def submit_call_cache():
 @pytest.fixture
 def sample_config_fp_type(tmp_path):
     """Generate a sample config type and corresponding fp."""
-    config_type = random.choice(list(ConfigType))
+    types = list(ConfigType)
+    config_type = random.choice(types)
     config_fp = tmp_path / f"pipe_config.{config_type}"
-    return config_type, config_fp
+
+    sample_config = {
+        "logging": {"log_level": "INFO"},
+        "pipeline": [
+            {"run": (tmp_path / "config.json").as_posix()},
+            {"collect-run": (tmp_path / "collect_config.json").as_posix()},
+        ],
+    }
+    return config_type, config_fp, sample_config
 
 
 @pytest.fixture
 def sample_pipeline_config(sample_config_fp_type):
     """Write a sample pipeline config for use in tests."""
-    config_type, config_fp = sample_config_fp_type
+    config_type, config_fp, sample_config = sample_config_fp_type
     with open(config_fp, "w") as file_:
-        config_type.dump(SAMPLE_CONFIG, file_)
+        config_type.dump(sample_config, file_)
+
+    for step_dict in sample_config["pipeline"]:
+        for step_config_fp in step_dict.values():
+            Path(step_config_fp).touch()
+
     return config_fp
 
 
@@ -99,9 +113,28 @@ def test_pipeline_init_bad_config(tmp_path):
     config_fp = tmp_path / "pipe_config.json"
     with open(config_fp, "w") as file_:
         json.dump({}, file_)
-    with pytest.raises(gapsConfigError):
+
+    with pytest.raises(gapsConfigError) as exc_info:
+        Pipeline(config_fp)
+
+    assert "Could not find required key" in str(exc_info)
+
+    with open(config_fp, "w") as file_:
+        json.dump({"pipeline": "./run_config.json"}, file_)
+
+    with pytest.raises(gapsConfigError) as exc_info:
+        Pipeline(config_fp)
+
+    assert "must be a list" in str(exc_info)
+
+    with open(config_fp, "w") as file_:
+        json.dump({"pipeline": [{"run": "./dne_config.json"}]}, file_)
+
+    with pytest.raises(gapsConfigError) as exc_info:
         Pipeline(config_fp)
 
+    assert "depends on non-existent file" in str(exc_info)
+
 
 def test_pipeline_init(sample_pipeline_config, assert_message_was_logged):
     """Test initializing Pipeline."""
@@ -156,6 +189,9 @@ def test_pipeline_submit(tmp_path, runnable_pipeline):
     with open(config_fp, "w") as file_:
         config_type.dump(sample_config, file_)
 
+    for fn in ["config.json", "collect_config.json", "dne_config.json"]:
+        (tmp_path / fn).touch()
+
     pipeline = Pipeline(config_fp)
     pipeline._submit(0)
     assert runnable_pipeline[-1] == (tmp_path / "config.json").as_posix()
@@ -617,7 +653,7 @@ def test_parse_previous_status(sample_pipeline_config):
     with pytest.warns(gapsWarning):
         assert not parse_previous_status(sample_pipeline_config.parent, "run")
 
-    assert len(list(sample_pipeline_config.parent.glob("*"))) == 2
+    assert len(list(sample_pipeline_config.parent.glob("*"))) == 4
     Status.make_single_job_file(
         sample_pipeline_config.parent,
         command="run",
@@ -638,13 +674,13 @@ def test_parse_previous_status(sample_pipeline_config):
             StatusField.OUT_FILE: ["another_test.h5", "a_third.h5"],
         },
     )
-    assert len(list(sample_pipeline_config.parent.glob("*"))) == 4
+    assert len(list(sample_pipeline_config.parent.glob("*"))) == 6
 
     out_files = parse_previous_status(
         sample_pipeline_config.parent, "collect-run"
     )
     assert set(out_files) == {"test.h5", "another_test.h5", "a_third.h5"}
-    assert len(list(sample_pipeline_config.parent.glob("*"))) == 4
+    assert len(list(sample_pipeline_config.parent.glob("*"))) == 6
     ids = parse_previous_status(
         sample_pipeline_config.parent, "collect-run", key=StatusField.JOB_ID
     )

From bdd31177d6622606c808118f33df07dcbc9eb1a3 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Tue, 1 Aug 2023 18:47:35 -0600
Subject: [PATCH 23/61] Add `split_keys=None` test

---
 tests/cli/test_cli_config.py | 58 ++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/tests/cli/test_cli_config.py b/tests/cli/test_cli_config.py
index c2ba406a..5d3e11e5 100644
--- a/tests/cli/test_cli_config.py
+++ b/tests/cli/test_cli_config.py
@@ -374,6 +374,64 @@ def _test_kickoff(ctx, cmd, **kwargs):
             )
 
 
+@pytest.mark.parametrize("test_class", [False, True])
+def test_run_no_split_keys(test_ctx, runnable_script, monkeypatch, test_class):
+    """Test the `run` function with no split keys specified."""
+
+    tmp_path = test_ctx.obj["TMP_PATH"]
+
+    if test_class:
+        command_config = CLICommandFromClass(
+            TestCommand,
+            "run",
+            name="run",
+            split_keys=None,
+        )
+    else:
+        command_config = CLICommandFromFunction(
+            _testing_function,
+            name="run",
+            split_keys=None,
+        )
+
+    config = {
+        "execution_control": {
+            "option": "eagle",
+            "allocation": "test",
+            "walltime": 1,
+            "max_workers": 1,
+        },
+        "input1": 1,
+        "input2": 7,
+        "input3": 8,
+        "_z_0": ["unsorted", "strings"],
+        "project_points": [0, 1, 2, 4],
+    }
+
+    config_fp = tmp_path / "config.json"
+    with open(config_fp, "w") as config_file:
+        json.dump(config, config_file)
+
+    job_names_cache = {}
+
+    def _test_kickoff(ctx, cmd, **kwargs):
+        job_names_cache[ctx.obj["NAME"]] = cmd
+
+    monkeypatch.setattr(
+        gaps.cli.execution, "_kickoff_hpc_job", _test_kickoff, raising=True
+    )
+
+    assert len(job_names_cache) == 0
+    from_config(config_fp, command_config)
+    assert len(job_names_cache) == 1
+    assert len(set(job_names_cache)) == 1
+
+    for job_name, script in job_names_cache.items():
+        assert "_j0" not in job_name
+        assert "[0, 1, 2, 4]" in script
+        assert '["unsorted", "strings"]' in script
+
+
 @pytest.mark.parametrize("test_class", [False, True])
 @pytest.mark.parametrize("num_nodes", [30, 100])
 def test_warning_about_au_usage(

From bfa508abc13357f373aacb38eb48feeae5bce65b Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Tue, 1 Aug 2023 19:02:08 -0600
Subject: [PATCH 24/61] Added test and docs for empty split key input

---
 gaps/cli/command.py          | 22 +++++++++----
 tests/cli/test_cli_config.py | 60 ++++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+), 6 deletions(-)

diff --git a/gaps/cli/command.py b/gaps/cli/command.py
index 675349a5..0c0c07eb 100644
--- a/gaps/cli/command.py
+++ b/gaps/cli/command.py
@@ -235,9 +235,14 @@ def __init__(
             "paired" keys match. To allow non-iterable user input for
             split keys, use the ``config_preprocessor`` argument to
             specify a preprocessing function that converts the user
-            input into a list of the expected inputs. If ``None``,
-            execution is not split across nodes, and a single node is
-            always used for the function call. By default, ``None``.
+            input into a list of the expected inputs. If users specify
+            an empty list or ``None`` for a key in ``split_keys``, then
+            GAPs will pass ``None`` as the value for that key (i.e. if
+            ``split_keys=["a"]`` and users specify ``"a": []`` in their
+            config, then the ``function`` will be called with
+            ``a=None``). If ``None``, execution is not split across
+            nodes, and a single node is always used for the function
+            call. By default, ``None``.
         config_preprocessor : callable, optional
             Optional function for configuration pre-processing. The
             preprocessing step occurs before jobs are split across HPC
@@ -535,9 +540,14 @@ def __init__(
             "paired" keys match. To allow non-iterable user input for
             split keys, use the ``config_preprocessor`` argument to
             specify a preprocessing function that converts the user
-            input into a list of the expected inputs. If ``None``,
-            execution is not split across nodes, and a single node is
-            always used for the function call. By default, ``None``.
+            input into a list of the expected inputs. If users specify
+            an empty list or ``None`` for a key in ``split_keys``, then
+            GAPs will pass ``None`` as the value for that key (i.e. if
+            ``split_keys=["a"]`` and users specify ``"a": []`` in their
+            config, then the ``function`` will be called with
+            ``a=None``). If ``None``, execution is not split across
+            nodes, and a single node is always used for the function
+            call. By default, ``None``.
         config_preprocessor : callable, optional
             Optional function for configuration pre-processing. The
             preprocessing step occurs before jobs are split across HPC
diff --git a/tests/cli/test_cli_config.py b/tests/cli/test_cli_config.py
index 5d3e11e5..f0718084 100644
--- a/tests/cli/test_cli_config.py
+++ b/tests/cli/test_cli_config.py
@@ -432,6 +432,66 @@ def _test_kickoff(ctx, cmd, **kwargs):
         assert '["unsorted", "strings"]' in script
 
 
+@pytest.mark.parametrize("test_class", [False, True])
+def test_run_empty_split_keys(
+    test_ctx, runnable_script, monkeypatch, test_class
+):
+    """Test the `run` function with empty split keys input."""
+
+    tmp_path = test_ctx.obj["TMP_PATH"]
+
+    if test_class:
+        command_config = CLICommandFromClass(
+            TestCommand,
+            "run",
+            name="run",
+            split_keys=["_z_0"],
+        )
+    else:
+        command_config = CLICommandFromFunction(
+            _testing_function,
+            name="run",
+            split_keys=["_z_0"],
+        )
+
+    config = {
+        "execution_control": {
+            "option": "eagle",
+            "allocation": "test",
+            "walltime": 1,
+            "max_workers": 1,
+        },
+        "input1": 1,
+        "input2": 7,
+        "input3": 8,
+        "_z_0": [],
+        "project_points": [0, 1, 2, 4],
+    }
+
+    config_fp = tmp_path / "config.json"
+    with open(config_fp, "w") as config_file:
+        json.dump(config, config_file)
+
+    job_names_cache = {}
+
+    def _test_kickoff(ctx, cmd, **kwargs):
+        job_names_cache[ctx.obj["NAME"]] = cmd
+
+    monkeypatch.setattr(
+        gaps.cli.execution, "_kickoff_hpc_job", _test_kickoff, raising=True
+    )
+
+    assert len(job_names_cache) == 0
+    from_config(config_fp, command_config)
+    assert len(job_names_cache) == 1
+    assert len(set(job_names_cache)) == 1
+
+    for job_name, script in job_names_cache.items():
+        assert "_j0" not in job_name
+        assert "[0, 1, 2, 4]" in script
+        assert '"_z_0": None' in script
+
+
 @pytest.mark.parametrize("test_class", [False, True])
 @pytest.mark.parametrize("num_nodes", [30, 100])
 def test_warning_about_au_usage(

From 1acf53cf6fcb2c2e21632af7667579c5ac44324d Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Thu, 3 Aug 2023 14:02:12 -0600
Subject: [PATCH 25/61] Log messages no longer propagate to root logger

---
 gaps/__init__.py  | 1 +
 tests/conftest.py | 8 +++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/gaps/__init__.py b/gaps/__init__.py
index a078140d..2d66ae1f 100644
--- a/gaps/__init__.py
+++ b/gaps/__init__.py
@@ -15,3 +15,4 @@
 logger = logging.getLogger(__name__)
 logger.addHandler(logging.NullHandler())
 logger.setLevel("DEBUG")
+logger.propagate = False
diff --git a/tests/conftest.py b/tests/conftest.py
index 821f5687..bba0b120 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -10,13 +10,19 @@
 import pytest
 from click.testing import CliRunner
 
-from gaps import TEST_DATA_DIR
+from gaps import TEST_DATA_DIR, logger
 from gaps.collection import find_h5_files
 
 
 LOGGING_META_FILES = {"log.py", "exceptions.py", "warnings.py"}
 
 
+@pytest.fixture(autouse=True)
+def include_logging():
+    """Make the gaps logger propagate calls to the root."""
+    logger.propagate = True
+
+
 @pytest.fixture
 def assert_message_was_logged(caplog):
     """Assert that a particular (partial) message was logged."""

From 1b08c1dedf2164f21944b33b819869eab77920b3 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Thu, 3 Aug 2023 14:02:33 -0600
Subject: [PATCH 26/61] More detailed logger print statement

---
 gaps/log.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/gaps/log.py b/gaps/log.py
index 1d483d96..ded22726 100644
--- a/gaps/log.py
+++ b/gaps/log.py
@@ -190,13 +190,16 @@ def print_logging_info():
     logger_names = [__name__.split(".", maxsplit=1)[0]]
     for name in logger_names:
         print(f"LOGGER: {name!r}")
-        log_to_debug = logging.getLogger(name)
-        while log_to_debug is not None:
+        logger_to_debug = logging.getLogger(name)
+        while logger_to_debug is not None:
             print(
-                f"level: {log_to_debug.level}, name: {log_to_debug.name},"
-                f"handlers: {log_to_debug.handlers}"
+                f"level: {logger_to_debug.level}, "
+                f"name: {logger_to_debug.name}, "
+                f"handlers: "
             )
-            log_to_debug = log_to_debug.parent
+            for handler in logger_to_debug.handlers:
+                print(f"\t +++ {handler.name} - {handler}")
+            logger_to_debug = logger_to_debug.parent
 
 
 def print_logging_info_all_libraries():

From 102306d3bf444c36469093f316ebbfa3b87f5196 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Thu, 3 Aug 2023 17:00:27 -0600
Subject: [PATCH 27/61] Updated variable name to `memory_utilization_limit`

---
 gaps/cli/command.py                 | 26 ++++++++++++++------------
 gaps/cli/documentation.py           |  2 +-
 gaps/collection.py                  | 24 ++++++++++++------------
 tests/cli/test_cli_documentation.py | 20 ++++++++++----------
 tests/test_collection.py            |  4 +++-
 5 files changed, 40 insertions(+), 36 deletions(-)

diff --git a/gaps/cli/command.py b/gaps/cli/command.py
index 0c0c07eb..8cdb95f1 100644
--- a/gaps/cli/command.py
+++ b/gaps/cli/command.py
@@ -161,12 +161,13 @@ def __init__(
             value.
 
             .. WARNING:: The keywords ``{"max-workers",
-                         "sites_per_worker", "mem_util_lim", "timeout",
-                         "pool_size"}`` are assumed to describe
-                         execution control. If you request any of these
-                         as function arguments, users of your CLI will
-                         specify them in the `execution_control` block
-                         of the input config file.
+                         "sites_per_worker", "memory_utilization_limit",
+                         "timeout", "pool_size"}`` are assumed to
+                         describe execution control. If you request any
+                         of these as function arguments, users of your
+                         CLI will specify them in the
+                         `execution_control` block of the input config
+                         file.
 
             Note that the ``config`` parameter is not allowed as
             a function signature item. Please request all the required
@@ -467,12 +468,13 @@ def __init__(
             value.
 
             .. WARNING:: The keywords ``{"max-workers",
-                         "sites_per_worker", "mem_util_lim", "timeout",
-                         "pool_size"}`` are assumed to describe
-                         execution control. If you request any of these
-                         as function arguments, users of your CLI will
-                         specify them in the `execution_control` block
-                         of the input config file.
+                         "sites_per_worker", "memory_utilization_limit",
+                         "timeout", "pool_size"}`` are assumed to
+                         describe execution control. If you request any
+                         of these as function arguments, users of your
+                         CLI will specify them in the
+                         `execution_control` block of the input config
+                         file.
 
             Note that the ``config`` parameter is not allowed as
             a function signature item. Please request all the required
diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py
index 91baf52b..28b2d765 100644
--- a/gaps/cli/documentation.py
+++ b/gaps/cli/documentation.py
@@ -28,7 +28,7 @@
 EXTRA_EXEC_PARAMS = {
     "max_workers": "Maximum number of parallel workers run on each node.",
     "sites_per_worker": "Number of sites to run in series on a worker.",
-    "mem_util_lim": (
+    "memory_utilization_limit": (
         "Memory utilization limit (fractional). Must be a value between 0 "
         "and 100. This input sets how much data will be stored in-memory "
         "at any given time before flushing to disk."
diff --git a/gaps/collection.py b/gaps/collection.py
index 8f5e925f..813ba132 100644
--- a/gaps/collection.py
+++ b/gaps/collection.py
@@ -56,7 +56,7 @@ def __init__(
         gids,
         dataset_in,
         dataset_out=None,
-        mem_util_lim=0.7,
+        memory_utilization_limit=0.7,
         pass_through=False,
     ):
         """
@@ -74,7 +74,7 @@ def __init__(
             Name of dataset into which collected data is to be written.
             If `None` the name of the output dataset is assumed to match
             the dataset input name. By default, `None`.
-        mem_util_lim : float, optional
+        memory_utilization_limit : float, optional
             Memory utilization limit (fractional). This sets how many
             sites will be collected at a time. By default, `0.7`.
         pass_through : bool, optional
@@ -96,7 +96,7 @@ def __init__(
         self._dataset_out = dataset_out
 
         tot_mem = psutil.virtual_memory().total
-        self._mem_avail = mem_util_lim * tot_mem
+        self._mem_avail = memory_utilization_limit * tot_mem
         self._axis, self._site_mem_req = self._pre_collect()
 
         logger.debug(
@@ -362,7 +362,7 @@ def collect_dataset(
         gids,
         dataset_in,
         dataset_out=None,
-        mem_util_lim=0.7,
+        memory_utilization_limit=0.7,
         pass_through=False,
     ):
         """Collect a dataset from multiple source files into a single file.
@@ -381,7 +381,7 @@ def collect_dataset(
             Name of dataset into which collected data is to be written.
             If `None` the name of the output dataset is assumed to match
             the dataset input name. By default, `None`.
-        mem_util_lim : float, optional
+        memory_utilization_limit : float, optional
             Memory utilization limit (fractional). This sets how many
             sites will be collected at a time. By default, `0.7`.
         pass_through : bool, optional
@@ -395,7 +395,7 @@ def collect_dataset(
             gids,
             dataset_in,
             dataset_out=dataset_out,
-            mem_util_lim=mem_util_lim,
+            memory_utilization_limit=memory_utilization_limit,
             pass_through=pass_through,
         )
         collector._collect()
@@ -623,7 +623,7 @@ def collect(
         self,
         dataset_in,
         dataset_out=None,
-        mem_util_lim=0.7,
+        memory_utilization_limit=0.7,
         pass_through=False,
     ):
         """Collect a dataset from h5_dir to h5_file
@@ -637,7 +637,7 @@ def collect(
             Name of dataset into which collected data is to be written.
             If `None` the name of the output dataset is assumed to match
             the dataset input name. By default, `None`.
-        mem_util_lim : float
+        memory_utilization_limit : float
             Memory utilization limit (fractional). This sets how many
             sites will be collected at a time. By default, `0.7`.
         pass_through : bool
@@ -670,7 +670,7 @@ def collect(
             self.gids,
             dataset_in,
             dataset_out=dataset_out,
-            mem_util_lim=mem_util_lim,
+            memory_utilization_limit=memory_utilization_limit,
             pass_through=pass_through,
         )
 
@@ -687,7 +687,7 @@ def add_dataset(
         collect_pattern,
         dataset_in,
         dataset_out=None,
-        mem_util_lim=0.7,
+        memory_utilization_limit=0.7,
         pass_through=False,
     ):
         """Collect and add a dataset to a single HDF5 file.
@@ -708,7 +708,7 @@ def add_dataset(
             Name of dataset into which collected data is to be written.
             If `None` the name of the output dataset is assumed to match
             the dataset input name. By default, `None`.
-        mem_util_lim : float
+        memory_utilization_limit : float
             Memory utilization limit (fractional). This sets how many
             sites will be collected at a time. By default, `0.7`.
         pass_through : bool
@@ -735,7 +735,7 @@ def add_dataset(
         collector.collect(
             dataset_in,
             dataset_out=dataset_out,
-            mem_util_lim=mem_util_lim,
+            memory_utilization_limit=memory_utilization_limit,
             pass_through=pass_through,
         )
 
diff --git a/tests/cli/test_cli_documentation.py b/tests/cli/test_cli_documentation.py
index 16f7dc11..0cb6e03f 100644
--- a/tests/cli/test_cli_documentation.py
+++ b/tests/cli/test_cli_documentation.py
@@ -36,7 +36,7 @@ def test_command_documentation_copies_skip_params():
 def test_command_documentation_extra_exec_params():
     """Test the `CommandDocumentation` with extra exec params."""
 
-    def func(max_workers, sites_per_worker, mem_util_lim, timeout, pool_size):
+    def func(max_workers, sites_per_worker, memory_utilization_limit, timeout, pool_size):
         """A short description.
 
         Parameters
@@ -45,7 +45,7 @@ def func(max_workers, sites_per_worker, mem_util_lim, timeout, pool_size):
             Number of workers to run.
         sites_per_worker : float
             Number of sites to run.
-        mem_util_lim : str
+        memory_utilization_limit : str
             A test documentation.
         timeout : dict
             A timeout value.
@@ -56,7 +56,7 @@ def func(max_workers, sites_per_worker, mem_util_lim, timeout, pool_size):
     expected_parameters = [
         "max_workers",
         "sites_per_worker",
-        "mem_util_lim",
+        "memory_utilization_limit",
         "timeout",
         "pool_size",
     ]
@@ -86,13 +86,13 @@ def func(max_workers, sites_per_worker, mem_util_lim, timeout, pool_size):
 def test_command_documentation_extra_exec_params_no_user_doc():
     """Test the `CommandDocumentation` with extra exec params no user doc."""
 
-    def func(max_workers, sites_per_worker, mem_util_lim, timeout, pool_size):
+    def func(max_workers, sites_per_worker, memory_utilization_limit, timeout, pool_size):
         """A short description."""
 
     expected_parameters = [
         "max_workers",
         "sites_per_worker",
-        "mem_util_lim",
+        "memory_utilization_limit",
         "timeout",
         "pool_size",
     ]
@@ -115,7 +115,7 @@ def test_command_documentation_extra_exec_params_user_defaults():
     def func(
         max_workers=2,
         sites_per_worker=0.4,
-        mem_util_lim="test",
+        memory_utilization_limit="test",
         timeout=None,
         pool_size=None,
     ):
@@ -127,7 +127,7 @@ def func(
             Number of workers to run. By default, ``2``.
         sites_per_worker : float, optional
             Number of sites to run. By default, ``0.4``.
-        mem_util_lim : str, optional
+        memory_utilization_limit : str, optional
             A test documentation. By default, ``"test"``.
         timeout : dict, optional
             A timeout value. By default, ``None``.
@@ -138,7 +138,7 @@ def func(
     expected_parameters = [
         "max_workers",
         "sites_per_worker",
-        "mem_util_lim",
+        "memory_utilization_limit",
         "timeout",
         "pool_size",
     ]
@@ -180,7 +180,7 @@ def test_command_documentation_extra_exec_params_defaults_no_docs():
     def func(
         max_workers=2,
         sites_per_worker=0.4,
-        mem_util_lim="test",
+        memory_utilization_limit="test",
         timeout=None,
         pool_size=None,
     ):
@@ -189,7 +189,7 @@ def func(
     expected_parameters = [
         "max_workers",
         "sites_per_worker",
-        "mem_util_lim",
+        "memory_utilization_limit",
         "timeout",
         "pool_size",
     ]
diff --git a/tests/test_collection.py b/tests/test_collection.py
index d3918513..a7864515 100644
--- a/tests/test_collection.py
+++ b/tests/test_collection.py
@@ -372,7 +372,9 @@ def test_low_mem_collect(tmp_path, collect_pattern, points_path):
     collect_dir, pattern = collect_pattern
 
     ctc = Collector(h5_file, collect_dir / pattern, points_path)
-    ctc.collect("cf_profile", dataset_out=None, mem_util_lim=0.00002)
+    ctc.collect(
+        "cf_profile", dataset_out=None, memory_utilization_limit=0.00002
+    )
 
     with h5py.File(h5_file, "r") as collected_outputs:
         assert "cf_profile" in collected_outputs

From 63cb9a24e37118796c02a450b879f54c6edffb6f Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Thu, 3 Aug 2023 17:16:35 -0600
Subject: [PATCH 28/61] Updated pipeline log docs

---
 gaps/cli/documentation.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py
index 28b2d765..d0aed8e8 100644
--- a/gaps/cli/documentation.py
+++ b/gaps/cli/documentation.py
@@ -133,7 +133,13 @@
     command.
 logging : dict, optional
     Dictionary containing keyword-argument pairs to pass to
-    :func:`gaps.log.init_logger`.
+    `init_logger <https://tinyurl.com/47hakp7f/>`_. This initializes
+    logging for the submission portion of the pipeline. Note, however,
+    that each step (command) will **also** record the submission step
+    log output to a common "project" log file, so it's only ever
+    necessary to use this input if you want a different (lower) level of
+    verbosity than the `log_level` specified in the config for the step
+    of the pipeline being executed.
 
 """
 

From e66907c5338672511b8f377ba837812ff2a00e45 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Fri, 4 Aug 2023 11:11:19 -0600
Subject: [PATCH 29/61] Fix bug with stdout not returning properly

---
 gaps/hpc.py       | 4 ++--
 tests/test_hpc.py | 7 ++++++-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/gaps/hpc.py b/gaps/hpc.py
index 7649585d..bf0df633 100644
--- a/gaps/hpc.py
+++ b/gaps/hpc.py
@@ -256,7 +256,7 @@ def _teardown_submission(self, name, out, err, keep_sh=False):
 
     def _mark_job_as_submitted(self, name, out):
         """Mark job as submitted in the queue."""
-        job_id = int(out.split(" ")[-1])
+        job_id = int(_job_id_or_out(out).split(" ")[-1])
         out = str(job_id)
         logger.debug("Job %r with id #%s submitted successfully", name, job_id)
         self._queue[job_id] = {
@@ -655,7 +655,7 @@ def submit(cmd, background=False, background_stdout=False):
         return "", ""
 
     stdout, stderr = _subprocess_popen(cmd)
-    return _job_id_or_out(stdout), stderr
+    return stdout, stderr
 
 
 def format_walltime(hours=None):
diff --git a/tests/test_hpc.py b/tests/test_hpc.py
index 4f31dab7..2209fd38 100644
--- a/tests/test_hpc.py
+++ b/tests/test_hpc.py
@@ -286,6 +286,7 @@ def test_submit(monkeypatch):
     cmd = "python -c \"print('hello world')\""
     cmd_id = "python -c \"print('Job ID is 12342')\""
     cmd_err = "python -c \"raise ValueError('An error occurred')\""
+    cmd_squeue = f'python -c "print({SQUEUE_RAW!r})"'
 
     out, err = submit(cmd, background=False, background_stdout=False)
     assert out == "hello world"
@@ -296,7 +297,11 @@ def test_submit(monkeypatch):
     assert not err
 
     out, err = submit(cmd_id, background=False, background_stdout=False)
-    assert out == "12342"
+    assert out == "Job ID is 12342"
+    assert not err
+
+    out, err = submit(cmd_squeue, background=False, background_stdout=False)
+    assert out == SQUEUE_RAW.replace("\n", "\r\n")
     assert not err
 
     with pytest.raises(OSError):

From 7507930b6fc7d7226a32e3176ca1b3436d0703ab Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Fri, 4 Aug 2023 12:38:20 -0600
Subject: [PATCH 30/61] Update documentation

---
 gaps/pipeline.py | 6 ++++++
 gaps/status.py   | 4 ++++
 2 files changed, 10 insertions(+)

diff --git a/gaps/pipeline.py b/gaps/pipeline.py
index fb31ab37..6fb58b4f 100644
--- a/gaps/pipeline.py
+++ b/gaps/pipeline.py
@@ -311,6 +311,12 @@ def _sort_key(status_entry):
 def parse_previous_status(status_dir, command, key=StatusField.OUT_FILE):
     """Parse key from job status(es) from the previous pipeline step.
 
+    This command DOES NOT check the HPC queue for jobs and therefore
+    DOES NOT update the status of previously running jobs that have
+    errored out of the HPC queue. For best results, ensure that all
+    previous steps of a pipeline have finished processing before calling
+    this function.
+
     Parameters
     ----------
     status_dir : path-like
diff --git a/gaps/status.py b/gaps/status.py
index 2dfa8860..8f0bbc89 100644
--- a/gaps/status.py
+++ b/gaps/status.py
@@ -587,6 +587,10 @@ def parse_command_status(
     ):
         """Parse key from job status(es) from the given command.
 
+        This command DOES NOT check the HPC queue for jobs and therefore
+        DOES NOT update the status of previously running jobs that have
+        errored out of the HPC queue.
+
         Parameters
         ----------
         status_dir : path-like

From c79e175681e15e388d60d71dd92422db78690f78 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Sat, 5 Aug 2023 14:50:00 -0600
Subject: [PATCH 31/61] Major update with multiple bug fixes

---
 gaps/cli/execution.py           |  6 +--
 gaps/legacy.py                  | 28 ++++++++++
 gaps/pipeline.py                |  2 +-
 gaps/status.py                  | 84 ++++++++++++++++++++---------
 tests/cli/test_cli_config.py    | 94 ++++++++++++++-------------------
 tests/cli/test_cli_execution.py | 79 ++++++++++++++++++++++++++-
 tests/cli/test_cli_status.py    | 84 +++++++++++++++++++++++++++++
 tests/test_hpc.py               | 30 +++++++++--
 tests/test_legacy.py            | 34 +++++++++++-
 tests/test_pipeline.py          |  2 +-
 tests/test_status.py            | 54 +++++++++++++++----
 11 files changed, 397 insertions(+), 100 deletions(-)

diff --git a/gaps/cli/execution.py b/gaps/cli/execution.py
index e3e53c6c..eb0ba910 100644
--- a/gaps/cli/execution.py
+++ b/gaps/cli/execution.py
@@ -61,7 +61,7 @@ def kickoff_job(ctx, cmd, exec_kwargs):
     exec_kwargs = _filter_exec_kwargs(
         exec_kwargs, hardware_option.manager.make_script_str, hardware_option
     )
-    _kickoff_hpc_job(ctx, cmd, **exec_kwargs)
+    _kickoff_hpc_job(ctx, cmd, hardware_option, **exec_kwargs)
 
 
 def _filter_exec_kwargs(kwargs, func, hardware_option):
@@ -138,7 +138,7 @@ def _kickoff_local_job(ctx, cmd):
     logger.info(msg)
 
 
-def _kickoff_hpc_job(ctx, cmd, **kwargs):
+def _kickoff_hpc_job(ctx, cmd, hardware_option, **kwargs):
     """Run a job (command) on the HPC."""
 
     if not _should_run(ctx):
@@ -159,7 +159,7 @@ def _kickoff_hpc_job(ctx, cmd, **kwargs):
         replace=True,
         job_attrs={
             StatusField.JOB_ID: out,
-            StatusField.HARDWARE: HardwareOption.EAGLE,
+            StatusField.HARDWARE: hardware_option,
             StatusField.QOS: kwargs.get("qos") or QOSOption.UNSPECIFIED,
             StatusField.JOB_STATUS: StatusOption.SUBMITTED,
             StatusField.TIME_SUBMITTED: dt.datetime.now().strftime(DT_FMT),
diff --git a/gaps/legacy.py b/gaps/legacy.py
index c4be6613..bbeaa731 100644
--- a/gaps/legacy.py
+++ b/gaps/legacy.py
@@ -152,6 +152,34 @@ def make_job_file(status_dir, module, job_name, attrs):
             attrs=attrs,
         )
 
+    @staticmethod
+    def _update_job_status_from_hardware(job_data, hardware_status_retriever):
+        """Update job status from HPC hardware if needed."""
+
+        # init defaults in case job/command not in status file yet
+        job_status = job_data.get(gaps.status.StatusField.JOB_STATUS, None)
+        job_id = job_data.get(gaps.status.StatusField.JOB_ID, None)
+        job_hardware = job_data.get(gaps.status.StatusField.HARDWARE, None)
+
+        # get job status from hardware
+        current = hardware_status_retriever[job_id, job_hardware]
+
+        # No current status and job was not successful: failed!
+        if (
+            current is None
+            and job_status != gaps.status.StatusOption.SUCCESSFUL
+        ):
+            job_data[
+                gaps.status.StatusField.JOB_STATUS
+            ] = gaps.status.StatusOption.FAILED
+
+        # do not overwrite a successful or failed job status.
+        elif (
+            current != job_status
+            and job_status not in gaps.status.StatusOption.members_as_str()
+        ):
+            job_data[gaps.status.StatusField.JOB_STATUS] = current
+
 
 # pylint: disable=no-member,invalid-name,super-init-not-called
 class Pipeline(gaps.pipeline.Pipeline):
diff --git a/gaps/pipeline.py b/gaps/pipeline.py
index 6fb58b4f..e8c59c9e 100644
--- a/gaps/pipeline.py
+++ b/gaps/pipeline.py
@@ -142,7 +142,7 @@ def _status(self, step):
         status = self.status
         submitted = _check_jobs_submitted(status, command)
         if not submitted:
-            return StatusOption.RUNNING
+            return StatusOption.NOT_SUBMITTED
 
         return self._get_command_return_code(status, command)
 
diff --git a/gaps/status.py b/gaps/status.py
index 8f0bbc89..bc08febe 100644
--- a/gaps/status.py
+++ b/gaps/status.py
@@ -214,7 +214,7 @@ def as_df(self, commands=None, index_name="job_name", include_cols=None):
         include_cols = include_cols or []
         output_cols = self._DF_COLUMNS + list(include_cols)
 
-        self.update_from_all_job_files(purge=False)
+        self.update_from_all_job_files(purge=False, check_hardware=True)
         if not self.data:
             return pd.DataFrame(columns=output_cols)
 
@@ -235,6 +235,7 @@ def as_df(self, commands=None, index_name="job_name", include_cols=None):
                 continue
             command_df[f"{StatusField.PIPELINE_INDEX}"] = command_index
             commands.append(command_df)
+
         try:
             command_df = pd.concat(commands, sort=False)
         except ValueError:
@@ -274,7 +275,7 @@ def dump(self):
 
         backup.unlink(missing_ok=True)
 
-    def update_from_all_job_files(self, purge=True):
+    def update_from_all_job_files(self, check_hardware=False, purge=True):
         """Update status from all single-job job status files.
 
         This method loads all single-job status files in the target
@@ -283,9 +284,14 @@ def update_from_all_job_files(self, purge=True):
 
         Parameters
         ----------
+        check_hardware : bool, optional
+            Option to check hardware status for job failures for jobs
+            with a "running" status. This is useful because the
+            "running" status may not be correctly updated if the job
+            terminates abnormally on the HPC. By default, `False`.
         purge : bool, optional
             Option to purge the individual status files.
-            By default,`True`.
+            By default, `True`.
 
         Returns
         -------
@@ -302,11 +308,48 @@ def update_from_all_job_files(self, purge=True):
             monitor_pid_info = _safe_load(monitor_pid_file, purge=purge)
             self.data.update(monitor_pid_info)
 
+        if check_hardware:
+            self._update_from_hardware()
+
         if purge:
             self.dump()
 
         return self
 
+    def _update_from_hardware(self):
+        """Check all job status against hardware status."""
+        hardware_status_retriever = HardwareStatusRetriever()
+        for job_data in self._job_statuses():
+            self._update_job_status_from_hardware(
+                job_data, hardware_status_retriever
+            )
+
+    def _job_statuses(self):
+        """Iterate over job status dicts. Ignore other info in self.data"""
+        for status in self.values():
+            try:
+                yield from _iter_job_status(status)
+            except AttributeError:
+                continue
+
+    @staticmethod
+    def _update_job_status_from_hardware(job_data, hardware_status_retriever):
+        """Update job status to failed if it is processing but DNE on HPC."""
+
+        if not StatusOption(
+            job_data.get(StatusField.JOB_STATUS, StatusOption.NOT_SUBMITTED)
+        ).is_processing:
+            return
+
+        job_id = job_data.get(StatusField.JOB_ID, None)
+        job_hardware = job_data.get(StatusField.HARDWARE, None)
+
+        # get job status from hardware
+        current = hardware_status_retriever[job_id, job_hardware]
+        # No current status and job was not successful: failed!
+        if current is None:
+            job_data[StatusField.JOB_STATUS] = StatusOption.FAILED
+
     def update_job_status(
         self, command, job_name, hardware_status_retriever=None
     ):
@@ -334,37 +377,20 @@ def update_job_status(
         # Update status data dict and file if job file was found
         if current is not None:
             self.data = recursively_update_dict(self.data, current)
-            self.dump()
 
         # check job status via hardware if job file not found.
         elif command in self.data:
             # job exists
             if job_name in self.data[command]:
-                job_data = self.data[command][job_name]
-
-                # init defaults in case job/command not in status file yet
-                previous = job_data.get(StatusField.JOB_STATUS, None)
-                job_id = job_data.get(StatusField.JOB_ID, None)
-                job_hardware = job_data.get(StatusField.HARDWARE, None)
-
-                # get job status from hardware
-                current = hardware_status_retriever[job_id, job_hardware]
-
-                # No current status and job was not successful: failed!
-                if current is None and previous != StatusOption.SUCCESSFUL:
-                    job_data[StatusField.JOB_STATUS] = StatusOption.FAILED
-
-                # do not overwrite a successful or failed job status.
-                elif (
-                    current != previous
-                    and previous not in StatusOption.members_as_str()
-                ):
-                    job_data[StatusField.JOB_STATUS] = current
-
+                self._update_job_status_from_hardware(
+                    self.data[command][job_name], hardware_status_retriever
+                )
             # job does not yet exist
             else:
                 self.data[command][job_name] = {}
 
+        self.dump()
+
     def _retrieve_job_status(
         self, command, job_name, hardware_status_retriever
     ):
@@ -802,3 +828,11 @@ def _validate_hardware(hardware):
             f"Available options are: {HardwareOption.members_as_str()}."
         )
         raise gapsKeyError(msg) from err
+
+
+def _iter_job_status(status):
+    """Iterate over job status dictionary."""
+    for job_status in status.values():
+        if not isinstance(job_status, dict):
+            continue
+        yield job_status
diff --git a/tests/cli/test_cli_config.py b/tests/cli/test_cli_config.py
index f0718084..a39ff673 100644
--- a/tests/cli/test_cli_config.py
+++ b/tests/cli/test_cli_config.py
@@ -201,6 +201,20 @@ def runnable_script():
         _CMD_LIST.pop(0)
 
 
+@pytest.fixture
+def job_names_cache(monkeypatch):
+    """Monkeypatch `_kickoff_hpc_job` and cache call"""
+    cache = {}
+
+    def _test_kickoff(ctx, cmd, __, **kwargs):
+        cache[ctx.obj["NAME"]] = cmd
+
+    monkeypatch.setattr(
+        gaps.cli.execution, "_kickoff_hpc_job", _test_kickoff, raising=True
+    )
+    return cache
+
+
 @pytest.mark.parametrize(
     ("extra_input", "none_list"),
     [
@@ -310,7 +324,7 @@ def pre_processing(config, a_value, a_multiplier):
 
 @pytest.mark.parametrize("test_class", [False, True])
 def test_run_multiple_nodes(
-    test_ctx, runnable_script, monkeypatch, test_class
+    test_ctx, runnable_script, monkeypatch, test_class, job_names_cache
 ):
     """Test the `run` function calls `_kickoff_hpc_job` for multiple nodes."""
 
@@ -349,15 +363,6 @@ def test_run_multiple_nodes(
     with open(config_fp, "w") as config_file:
         json.dump(config, config_file)
 
-    job_names_cache = {}
-
-    def _test_kickoff(ctx, cmd, **kwargs):
-        job_names_cache[ctx.obj["NAME"]] = cmd
-
-    monkeypatch.setattr(
-        gaps.cli.execution, "_kickoff_hpc_job", _test_kickoff, raising=True
-    )
-
     assert len(job_names_cache) == 0
     from_config(config_fp, command_config)
     assert len(job_names_cache) == 4
@@ -375,7 +380,9 @@ def _test_kickoff(ctx, cmd, **kwargs):
 
 
 @pytest.mark.parametrize("test_class", [False, True])
-def test_run_no_split_keys(test_ctx, runnable_script, monkeypatch, test_class):
+def test_run_no_split_keys(
+    test_ctx, runnable_script, monkeypatch, test_class, job_names_cache
+):
     """Test the `run` function with no split keys specified."""
 
     tmp_path = test_ctx.obj["TMP_PATH"]
@@ -412,15 +419,6 @@ def test_run_no_split_keys(test_ctx, runnable_script, monkeypatch, test_class):
     with open(config_fp, "w") as config_file:
         json.dump(config, config_file)
 
-    job_names_cache = {}
-
-    def _test_kickoff(ctx, cmd, **kwargs):
-        job_names_cache[ctx.obj["NAME"]] = cmd
-
-    monkeypatch.setattr(
-        gaps.cli.execution, "_kickoff_hpc_job", _test_kickoff, raising=True
-    )
-
     assert len(job_names_cache) == 0
     from_config(config_fp, command_config)
     assert len(job_names_cache) == 1
@@ -434,7 +432,7 @@ def _test_kickoff(ctx, cmd, **kwargs):
 
 @pytest.mark.parametrize("test_class", [False, True])
 def test_run_empty_split_keys(
-    test_ctx, runnable_script, monkeypatch, test_class
+    test_ctx, runnable_script, monkeypatch, test_class, job_names_cache
 ):
     """Test the `run` function with empty split keys input."""
 
@@ -472,15 +470,6 @@ def test_run_empty_split_keys(
     with open(config_fp, "w") as config_file:
         json.dump(config, config_file)
 
-    job_names_cache = {}
-
-    def _test_kickoff(ctx, cmd, **kwargs):
-        job_names_cache[ctx.obj["NAME"]] = cmd
-
-    monkeypatch.setattr(
-        gaps.cli.execution, "_kickoff_hpc_job", _test_kickoff, raising=True
-    )
-
     assert len(job_names_cache) == 0
     from_config(config_fp, command_config)
     assert len(job_names_cache) == 1
@@ -494,8 +483,16 @@ def _test_kickoff(ctx, cmd, **kwargs):
 
 @pytest.mark.parametrize("test_class", [False, True])
 @pytest.mark.parametrize("num_nodes", [30, 100])
+@pytest.mark.parametrize("option", ["eagle", "dne"])
 def test_warning_about_au_usage(
-    test_ctx, runnable_script, monkeypatch, test_class, caplog, num_nodes
+    test_ctx,
+    runnable_script,
+    monkeypatch,
+    test_class,
+    caplog,
+    num_nodes,
+    option,
+    job_names_cache,
 ):
     """Test the `run` function calls `_kickoff_hpc_job` for multiple nodes."""
 
@@ -517,7 +514,7 @@ def test_warning_about_au_usage(
 
     config = {
         "execution_control": {
-            "option": "eagle",
+            "option": option,
             "allocation": "test",
             "qos": "high",
             "walltime": 50,
@@ -532,25 +529,21 @@ def test_warning_about_au_usage(
     with open(config_fp, "w") as config_file:
         json.dump(config, config_file)
 
-    job_names_cache = {}
-
-    def _test_kickoff(ctx, cmd, **kwargs):
-        job_names_cache[ctx.obj["NAME"]] = cmd
-
-    monkeypatch.setattr(
-        gaps.cli.execution, "_kickoff_hpc_job", _test_kickoff, raising=True
-    )
-
     assert not caplog.records
     assert len(job_names_cache) == 0
-    from_config(config_fp, command_config)
-    assert len(job_names_cache) == num_nodes
-    assert len(set(job_names_cache)) == num_nodes
+    try:
+        from_config(config_fp, command_config)
+    except ValueError:
+        pass
+
+    if option == "eagle":
+        assert len(job_names_cache) == num_nodes
+        assert len(set(job_names_cache)) == num_nodes
 
     warnings = [
         record for record in caplog.records if record.levelname == "WARNING"
     ]
-    if num_nodes < 33:
+    if num_nodes < 33 or option != "eagle":
         assert not warnings
     else:
         assert warnings
@@ -562,7 +555,7 @@ def _test_kickoff(ctx, cmd, **kwargs):
 
 @pytest.mark.parametrize("test_class", [False, True])
 def test_run_parallel_split_keys(
-    test_ctx, runnable_script, monkeypatch, test_class
+    test_ctx, runnable_script, monkeypatch, test_class, job_names_cache
 ):
     """Test the `run` function calls `_kickoff_hpc_job` for multiple nodes."""
 
@@ -599,15 +592,6 @@ def test_run_parallel_split_keys(
     with open(config_fp, "w") as config_file:
         json.dump(config, config_file)
 
-    job_names_cache = {}
-
-    def _test_kickoff(ctx, cmd, **kwargs):
-        job_names_cache[ctx.obj["NAME"]] = cmd
-
-    monkeypatch.setattr(
-        gaps.cli.execution, "_kickoff_hpc_job", _test_kickoff, raising=True
-    )
-
     assert len(job_names_cache) == 0
     from_config(config_fp, command_config)
     assert len(job_names_cache) == 6
diff --git a/tests/cli/test_cli_execution.py b/tests/cli/test_cli_execution.py
index 06393709..aa95d396 100644
--- a/tests/cli/test_cli_execution.py
+++ b/tests/cli/test_cli_execution.py
@@ -9,7 +9,7 @@
 import pytest
 
 import gaps.hpc
-from gaps.status import Status, StatusField, StatusOption
+from gaps.status import Status, StatusField, StatusOption, HardwareOption
 from gaps.cli.execution import kickoff_job, _should_run
 from gaps.exceptions import gapsConfigError
 
@@ -124,6 +124,7 @@ def test_kickoff_job_hpc(
 ):
     """Test kickoff command for HPC job."""
 
+    test_ctx.obj.pop("MANAGER", None)
     run_dir = test_ctx.obj["TMP_PATH"]
     assert not list(run_dir.glob("*"))
     job_name = "_".join([test_ctx.obj["NAME"], str(high_qos)])
@@ -154,8 +155,9 @@ def _test_submit(cmd):
     assert not list(test_ctx.obj["TMP_PATH"].glob("*"))
 
     kickoff_job(test_ctx, cmd, exec_kwargs)
+    test_ctx.obj.pop("MANAGER", None)
 
-    assert cmd_cache
+    assert len(cmd_cache) == 2
     assert_message_was_logged(
         "Found extra keys in 'execution_control'! ", "WARNING"
     )
@@ -183,11 +185,84 @@ def _test_submit(cmd):
     exec_kwargs = {"option": "eagle", "allocation": "test", "walltime": 0.43}
     kickoff_job(test_ctx, cmd, exec_kwargs)
     assert_message_was_logged("not resubmitting", "INFO")
+    assert len(cmd_cache) == 2
+
+    Status.make_single_job_file(
+        run_dir,
+        "run",
+        job_name,
+        {StatusField.JOB_STATUS: StatusOption.RUNNING},
+    )
+    kickoff_job(test_ctx, cmd, exec_kwargs)
+    assert len(cmd_cache) == 2
+
+    # check repeated call does not requeue HPC
+    kickoff_job(test_ctx, cmd, exec_kwargs)
+    assert len(cmd_cache) == 2
 
     exec_kwargs = {"option": "eagle", "walltime": 0.43}
     with pytest.raises(gapsConfigError):
         kickoff_job(test_ctx, cmd, exec_kwargs)
 
+    HardwareOption.EAGLE.manager = gaps.hpc.SLURM()
+    test_ctx.obj.pop("MANAGER", None)
+
+
+def test_qos_values(test_ctx, monkeypatch):
+    """Test kickoff command for HPC job."""
+
+    test_ctx.obj.pop("MANAGER", None)
+    run_dir = test_ctx.obj["TMP_PATH"]
+    assert not list(run_dir.glob("*"))
+
+    exec_kwargs = {
+        "option": "eagle",
+        "dne_arg": 0,
+        "allocation": "test",
+        "walltime": 0.43,
+        "stdout_path": (test_ctx.obj["TMP_PATH"] / "stdout").as_posix(),
+        "qos": "dne",
+    }
+
+    cmd = (
+        "python -c \"import warnings; print('hello world'); "
+        "warnings.warn('a test warning')\""
+    )
+    cmd_cache = []
+
+    def _test_submit(cmd):
+        cmd_cache.append(cmd)
+        return "9999", None
+
+    monkeypatch.setattr(gaps.hpc, "submit", _test_submit, raising=True)
+    monkeypatch.setattr(
+        gaps.hpc.PBS,
+        "_job_is_running",
+        lambda *__, **___: True,
+        raising=True,
+    )
+    assert not cmd_cache
+    assert not list(test_ctx.obj["TMP_PATH"].glob("*"))
+
+    with pytest.raises(gapsConfigError):
+        kickoff_job(test_ctx, cmd, exec_kwargs)
+
+    exec_kwargs["option"] = "peregrine"
+    kickoff_job(test_ctx, cmd, exec_kwargs)
+
+    status_file = list(run_dir.glob("*.json"))
+    assert len(status_file) == 1
+    status_file = status_file[0]
+    assert status_file.name.endswith(".json")
+
+    with open(status_file, "r") as status_fh:
+        status = json.load(status_fh)
+
+    assert status["run"][test_ctx.obj["NAME"]][StatusField.QOS] == "dne"
+
+    HardwareOption.EAGLE.manager = gaps.hpc.SLURM()
+    test_ctx.obj.pop("MANAGER", None)
+
 
 if __name__ == "__main__":
     pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"])
diff --git a/tests/cli/test_cli_status.py b/tests/cli/test_cli_status.py
index f40ef643..5ba85519 100644
--- a/tests/cli/test_cli_status.py
+++ b/tests/cli/test_cli_status.py
@@ -7,6 +7,7 @@
 import psutil
 import pytest
 
+from gaps.status import HardwareStatusRetriever, StatusOption
 from gaps.cli.status import status_command
 from gaps.warnings import gapsWarning
 
@@ -37,6 +38,12 @@ def test_status(test_data_dir, cli_runner, extra_args, monkeypatch):
     """Test the status command."""
 
     monkeypatch.setattr(psutil, "pid_exists", lambda *__: True, raising=True)
+    monkeypatch.setattr(
+        HardwareStatusRetriever,
+        "__getitem__",
+        lambda *__, **___: StatusOption.SUBMITTED,
+        raising=True,
+    )
 
     status = status_command()
     if "dne" in extra_args:
@@ -86,5 +93,82 @@ def test_status(test_data_dir, cli_runner, extra_args, monkeypatch):
     assert lines[-4] == "Total AUs spent: 6"
 
 
+@pytest.mark.parametrize(
+    "extra_args",
+    [
+        [],
+        "-i out_dir".split(),
+        "-c run".split() + "-c collect-run".split(),
+        "-s successful".split()
+        + "-s fail".split()
+        + "-s r".split()
+        + "-s pending".split()
+        + "-s u".split()
+        + "-s dne".split(),
+        "-c run".split()
+        + "-c collect-run".split()
+        + "-s successful".split()
+        + "-s fail".split()
+        + "-s r".split()
+        + "-s pending".split()
+        + "-s u".split()
+        + "-s dne".split(),
+    ],
+)
+def test_status_with_hardware_check(
+    test_data_dir, cli_runner, extra_args, monkeypatch
+):
+    """Test the status command."""
+
+    monkeypatch.setattr(psutil, "pid_exists", lambda *__: True, raising=True)
+
+    status = status_command()
+    if "dne" in extra_args:
+        with pytest.warns(gapsWarning):
+            result = cli_runner.invoke(
+                status,
+                [(test_data_dir / "test_run").as_posix()] + extra_args,
+            )
+    else:
+        result = cli_runner.invoke(
+            status,
+            [(test_data_dir / "test_run").as_posix()] + extra_args,
+        )
+    lines = result.stdout.split("\n")
+    cols = [
+        "job_status",
+        "time_submitted",
+        "time_start",
+        "time_end",
+        "total_runtime",
+        "hardware",
+        "qos",
+    ]
+    if "out_dir" in extra_args:
+        cols += ["out_dir"]
+    cols = " ".join(cols)
+
+    expected_partial_lines = [
+        "test_run",
+        "MONITOR PID: 1234",
+        cols,
+        "--",
+        "gaps_test_run_j0 successful 0:03:38 local",
+        "gaps_test_run_j1 failed 0:01:05 eagle high",
+        "gaps_test_run_j2 failed local unspecified",
+        "gaps_test_run_j3 failed local",
+        "collect-run not submitted",
+    ]
+
+    for ind, partial in enumerate(expected_partial_lines):
+        assert all(
+            string in lines[-16 + ind] for string in partial.split()
+        ), partial
+
+    assert "Total Runtime" in lines[-6]
+    assert "Total project time" in lines[-5]
+    assert lines[-4] == "Total AUs spent: 6"
+
+
 if __name__ == "__main__":
     pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"])
diff --git a/tests/test_hpc.py b/tests/test_hpc.py
index 2209fd38..a88b07c4 100644
--- a/tests/test_hpc.py
+++ b/tests/test_hpc.py
@@ -167,8 +167,28 @@ def test_check_job(manager, q_str):
 
 
 @pytest.mark.parametrize(
-    ("manager", "q_str", "kwargs", "expectation"),
+    ("manager", "q_str", "kwargs", "expectation", "add_qos"),
     [
+        (
+            PBS,
+            Q_STAT,
+            {"queue": "batch-h", "sh_script": "echo Hello!"},
+            {
+                "#PBS -N submit_test": 1,
+                "#PBS -A rev": 2,
+                "#PBS -q batch-h": 3,
+                # cspell:disable-next-line
+                "#PBS -o ./stdout/submit_test_$PBS_JOBID.o": 4,
+                # cspell:disable-next-line
+                "#PBS -e ./stdout/submit_test_$PBS_JOBID.e": 5,
+                "#PBS -l walltime=00:26:00,qos=high": 6,
+                # cspell:disable-next-line
+                "echo Running on: $HOSTNAME, Machine Type: $MACHTYPE": 7,
+                "echo Running python in directory `which python`": 8,
+                "echo Hello!": 9,
+            },
+            True,
+        ),
         (
             PBS,
             Q_STAT,
@@ -187,6 +207,7 @@ def test_check_job(manager, q_str):
                 "echo Running python in directory `which python`": 8,
                 "echo Hello!": 9,
             },
+            False,
         ),
         (
             SLURM,
@@ -199,16 +220,17 @@ def test_check_job(manager, q_str):
                 "#SBATCH --nodes=1": 4,
                 "#SBATCH --output=./stdout/submit_test_%j.o": 5,
                 "#SBATCH --error=./stdout/submit_test_%j.e": 6,
-                "#SBATCH --qos=normal": 7,
+                "#SBATCH --qos=high": 7,
                 # cspell:disable-next-line
                 "echo Running on: $HOSTNAME, Machine Type: $MACHTYPE": 8,
                 "echo Running python in directory `which python`": 9,
                 "echo Hello!": 10,
             },
+            True,
         ),
     ],
 )
-def test_hpc_submit(manager, q_str, kwargs, expectation, monkeypatch):
+def test_hpc_submit(manager, q_str, kwargs, expectation, add_qos, monkeypatch):
     """Test the HPC job submission utility"""
 
     queue_dict = manager.parse_queue_str(q_str)
@@ -216,6 +238,8 @@ def test_hpc_submit(manager, q_str, kwargs, expectation, monkeypatch):
     kwargs["cmd"] = "python -c \"print('hello world')\""
     kwargs["allocation"] = "rev"
     kwargs["walltime"] = 0.43
+    if add_qos:
+        kwargs["qos"] = "high"
     out, err = hpc_manager.submit("job1", **kwargs)
     assert out is None
     assert err == "already_running"
diff --git a/tests/test_legacy.py b/tests/test_legacy.py
index fc2eb51a..1e1a0be4 100644
--- a/tests/test_legacy.py
+++ b/tests/test_legacy.py
@@ -17,6 +17,7 @@
     StatusField,
     StatusOption,
 )
+import gaps.status
 from gaps.legacy import (
     HardwareStatusRetriever,
     Status,
@@ -143,7 +144,7 @@ def test_make_job_file_and_retrieve_job_status(temp_job_dir, job_name):
     assert expected_fn not in [f.name for f in tmp_path.glob("*")]
     assert status_fp in tmp_path.glob("*")
 
-    assert status == "R", "Failed, status is {status!r}"
+    assert status == "R", f"Failed, status is {status!r}"
 
     status = Status.retrieve_job_status(
         status_dir=tmp_path, module="generation", job_name="test1.h5"
@@ -175,6 +176,37 @@ def test_make_job_file_and_retrieve_job_status(temp_job_dir, job_name):
     )
 
 
+def test_update_job_status(tmp_path, monkeypatch):
+    """Test updating job status"""
+    status = Status(tmp_path)
+    status.data = {
+        "run": {
+            StatusField.PIPELINE_INDEX: 0,
+            "test0": {StatusField.JOB_STATUS: "R"},
+            "test1": {StatusField.JOB_STATUS: StatusOption.SUCCESSFUL},
+        }
+    }
+
+    monkeypatch.setattr(
+        gaps.status.HardwareStatusRetriever,
+        "__getitem__",
+        lambda *__, **___: "successful",
+        raising=True,
+    )
+
+    status.update_job_status("run", "test0")
+    assert (
+        status.data["run"]["test0"][StatusField.JOB_STATUS]
+        == StatusOption.SUCCESSFUL
+    )
+
+    status.update_job_status("run", "test1")
+    assert (
+        status.data["run"]["test1"][StatusField.JOB_STATUS]
+        == StatusOption.SUCCESSFUL
+    )
+
+
 def test_add_job(temp_job_dir):
     """Test job addition and exist check"""
     tmp_path, status_fp = temp_job_dir
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index 1e24112f..02ab0bdc 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -336,7 +336,7 @@ def test_pipeline_get_command_return_code(
 def test_pipeline_status(sample_pipeline_config, monkeypatch):
     """Test the `_status` function."""
     pipeline = Pipeline(sample_pipeline_config)
-    assert pipeline._status(0) == StatusOption.RUNNING
+    assert pipeline._status(0) == StatusOption.NOT_SUBMITTED
     monkeypatch.setattr(
         HardwareStatusRetriever,
         "__getitem__",
diff --git a/tests/test_status.py b/tests/test_status.py
index 244f3309..06e89029 100644
--- a/tests/test_status.py
+++ b/tests/test_status.py
@@ -33,7 +33,7 @@
 
 TEST_1_ATTRS_1 = {
     "job_name": "test1",
-    StatusField.JOB_STATUS: "R",
+    StatusField.JOB_STATUS: StatusOption.RUNNING,
     "run_id": 1234,
 }
 TEST_1_ATTRS_2 = {
@@ -42,11 +42,11 @@
 }
 TEST_2_ATTRS_1 = {
     "job_name": "test2",
-    StatusField.JOB_STATUS: "R",
+    StatusField.JOB_STATUS: StatusOption.RUNNING,
 }
 TEST_2_ATTRS_2 = {
     "job_name": "test2",
-    StatusField.JOB_STATUS: "R",
+    StatusField.JOB_STATUS: StatusOption.RUNNING,
     StatusField.JOB_ID: 123,
 }
 
@@ -257,7 +257,7 @@ def test_make_file(temp_job_dir, job_name):
     assert expected_fn not in [f.name for f in tmp_path.glob("*")]
     assert status_fp in tmp_path.glob("*")
 
-    assert status == "R", "Failed, status is {status!r}"
+    assert status == StatusOption.RUNNING, f"Failed, status is {status!r}"
 
     status = Status.retrieve_job_status(tmp_path, "generation", "test1.h5")
     assert expected_fn not in [f.name for f in tmp_path.glob("*")]
@@ -299,6 +299,9 @@ def test_update_job_status(tmp_path, monkeypatch):
     status.update_job_status("run", job_name)
     assert status.data["run"].get(job_name) == {}
 
+    tmp_path = tmp_path / "test"
+    tmp_path.mkdir()
+
     Status.make_single_job_file(
         tmp_path, "generation", "test1", TEST_1_ATTRS_1
     )
@@ -331,33 +334,66 @@ def test_update_job_status(tmp_path, monkeypatch):
     )
     new_attrs = {
         "job_name": "test1",
-        StatusField.JOB_STATUS: "some status",
+        StatusField.JOB_STATUS: StatusOption.SUBMITTED,
         StatusField.HARDWARE: "local",
     }
     Status.make_single_job_file(tmp_path, "generation", "test1", new_attrs)
     status.update_job_status("generation", "test1")
     assert (
         status.data["generation"]["test1"][StatusField.JOB_STATUS]
-        == "some status"
+        == StatusOption.SUBMITTED
     )
 
     monkeypatch.setattr(
         HardwareStatusRetriever,
         "__getitem__",
-        lambda *__, **___: StatusOption.RUNNING,
+        lambda *__, **___: StatusOption.SUBMITTED,
         raising=True,
     )
     status.update_job_status("generation", "test1")
     assert (
         status.data["generation"]["test1"][StatusField.JOB_STATUS]
-        == StatusOption.RUNNING
+        == StatusOption.SUBMITTED
     )
 
     # test a repeated call to hardware
     status.update_job_status("generation", "test1")
     assert (
         status.data["generation"]["test1"][StatusField.JOB_STATUS]
-        == StatusOption.RUNNING
+        == StatusOption.SUBMITTED
+    )
+
+
+def test_update_job_status_with_hardware(tmp_path, monkeypatch):
+    """Test status update with call to hardware"""
+    monkeypatch.setattr(
+        HardwareStatusRetriever,
+        "__getitem__",
+        lambda *__, **___: None,
+        raising=True,
+    )
+
+    Status.make_single_job_file(
+        tmp_path,
+        "generation",
+        "test1",
+        {StatusField.JOB_STATUS: StatusOption.SUBMITTED},
+    )
+    Status.make_single_job_file(
+        tmp_path,
+        "generation",
+        "test2",
+        {StatusField.JOB_STATUS: StatusOption.COMPLETE},
+    )
+
+    status = Status(tmp_path).update_from_all_job_files(check_hardware=True)
+    assert (
+        status["generation"]["test1"][StatusField.JOB_STATUS]
+        == StatusOption.FAILED
+    )
+    assert (
+        status["generation"]["test2"][StatusField.JOB_STATUS]
+        == StatusOption.COMPLETE
     )
 
 

From 902f202852a714034c83df959dfb7c13af3510d4 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Sat, 5 Aug 2023 15:23:46 -0600
Subject: [PATCH 32/61] Updated message text

---
 gaps/cli/status.py           | 2 +-
 tests/cli/test_cli_status.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gaps/cli/status.py b/gaps/cli/status.py
index 1f7f8eb0..9e4aaf97 100644
--- a/gaps/cli/status.py
+++ b/gaps/cli/status.py
@@ -147,7 +147,7 @@ def color_string(string):
         print(f"{Style.BRIGHT}{runtime_str}{Style.RESET_ALL}")
         if walltime > 2:
             walltime_str = (
-                f"Total project time (including queue): "
+                f"Total project wall time (including queue and downtime): "
                 f"{_elapsed_time_as_str(walltime)}"
             )
             print(f"{Style.BRIGHT}{walltime_str}{Style.RESET_ALL}")
diff --git a/tests/cli/test_cli_status.py b/tests/cli/test_cli_status.py
index 5ba85519..d2864df2 100644
--- a/tests/cli/test_cli_status.py
+++ b/tests/cli/test_cli_status.py
@@ -89,7 +89,7 @@ def test_status(test_data_dir, cli_runner, extra_args, monkeypatch):
         ), partial
 
     assert "Total Runtime" in lines[-6]
-    assert "Total project time" in lines[-5]
+    assert "Total project wall time" in lines[-5]
     assert lines[-4] == "Total AUs spent: 6"
 
 
@@ -166,7 +166,7 @@ def test_status_with_hardware_check(
         ), partial
 
     assert "Total Runtime" in lines[-6]
-    assert "Total project time" in lines[-5]
+    assert "Total project wall time" in lines[-5]
     assert lines[-4] == "Total AUs spent: 6"
 
 

From 80d23e278fe7bb44448f4be7abf50fd2fc7cdab5 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Sat, 5 Aug 2023 15:30:53 -0600
Subject: [PATCH 33/61] Runtime not added to status monitor if job fails
 unexpectedly

---
 gaps/status.py               | 9 +++++----
 tests/cli/test_cli_status.py | 2 ++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/gaps/status.py b/gaps/status.py
index bc08febe..2278189b 100644
--- a/gaps/status.py
+++ b/gaps/status.py
@@ -750,10 +750,11 @@ def _elapsed_time_as_str(seconds_elapsed):
 
 def _add_elapsed_time(status_df):
     """Add elapsed time to status DataFrame"""
-    mask = (
-        ~status_df[StatusField.TIME_START].isna()
-        & status_df[StatusField.TIME_END].isna()
-    )
+    has_start_time = ~status_df[StatusField.TIME_START].isna()
+    has_no_end_time = status_df[StatusField.TIME_END].isna()
+    has_not_failed = status_df[StatusField.JOB_STATUS] != StatusOption.FAILED
+    mask = has_start_time & (has_no_end_time & has_not_failed)
+
     start_times = status_df.loc[mask, StatusField.TIME_START]
     start_times = pd.to_datetime(start_times, format=DT_FMT)
     elapsed_times = dt.datetime.now() - start_times
diff --git a/tests/cli/test_cli_status.py b/tests/cli/test_cli_status.py
index d2864df2..97fc200f 100644
--- a/tests/cli/test_cli_status.py
+++ b/tests/cli/test_cli_status.py
@@ -164,6 +164,8 @@ def test_status_with_hardware_check(
         assert all(
             string in lines[-16 + ind] for string in partial.split()
         ), partial
+        if "failed" in lines[-16 + ind]:
+            assert not "(r)" in lines[-16 + ind]
 
     assert "Total Runtime" in lines[-6]
     assert "Total project wall time" in lines[-5]

From 81ea8c5094903c8b0b22f6623fc4f5a5e5121fb2 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Mon, 7 Aug 2023 12:04:36 -0600
Subject: [PATCH 34/61] Failed jobs default to 0 runtime

---
 gaps/cli/status.py                            |  25 +++--
 tests/cli/test_cli_status.py                  | 103 ++++++++++++++++++
 .../jobstatus_gaps_test_failed_run_node0.json |  11 ++
 .../jobstatus_gaps_test_failed_run_node1.json |  10 ++
 .../jobstatus_gaps_test_failed_run_node2.json |  10 ++
 .../test_failed_run_status.json               |  26 +++++
 6 files changed, 176 insertions(+), 9 deletions(-)
 create mode 100644 tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node0.json
 create mode 100644 tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node1.json
 create mode 100644 tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node2.json
 create mode 100644 tests/data/test_failed_run/test_failed_run_status.json

diff --git a/gaps/cli/status.py b/gaps/cli/status.py
index 9e4aaf97..83f3d896 100644
--- a/gaps/cli/status.py
+++ b/gaps/cli/status.py
@@ -211,16 +211,23 @@ def main_monitor(folder, commands, status, include):
         df.total_runtime_seconds = run_times_seconds.sum()
         df.total_aus_used = aus_used.sum()
 
-        start_time = df[StatusField.TIME_SUBMITTED].fillna(
-            dt.datetime.now().strftime(DT_FMT)
-        )
-        start_time = pd.to_datetime(start_time, format=DT_FMT).min()
+        all_jobs_failed = (
+            df[StatusField.JOB_STATUS] == StatusOption.FAILED
+        ).all()
+        all_end_times_missing = df[StatusField.TIME_END].isna().all()
+        if all_jobs_failed and all_end_times_missing:
+            df.walltime = 0
+        else:
+            start_time = df[StatusField.TIME_SUBMITTED].fillna(
+                dt.datetime.now().strftime(DT_FMT)
+            )
+            start_time = pd.to_datetime(start_time, format=DT_FMT).min()
 
-        end_time = df[StatusField.TIME_END].fillna(
-            dt.datetime.now().strftime(DT_FMT)
-        )
-        end_time = pd.to_datetime(end_time, format=DT_FMT).max()
-        df.walltime = (end_time - start_time).total_seconds()
+            end_time = df[StatusField.TIME_END].fillna(
+                dt.datetime.now().strftime(DT_FMT)
+            )
+            end_time = pd.to_datetime(end_time, format=DT_FMT).max()
+            df.walltime = (end_time - start_time).total_seconds()
         _color_print(df, directory.name, commands, status)
 
 
diff --git a/tests/cli/test_cli_status.py b/tests/cli/test_cli_status.py
index 97fc200f..e0eb81d9 100644
--- a/tests/cli/test_cli_status.py
+++ b/tests/cli/test_cli_status.py
@@ -2,6 +2,8 @@
 """
 GAPs status command tests.
 """
+import json
+import shutil
 from pathlib import Path
 
 import psutil
@@ -172,5 +174,106 @@ def test_status_with_hardware_check(
     assert lines[-4] == "Total AUs spent: 6"
 
 
+@pytest.mark.parametrize(
+    "extra_args",
+    [
+        [],
+        "-i out_dir".split(),
+        "-c run".split() + "-c collect-run".split(),
+        "-s successful".split()
+        + "-s fail".split()
+        + "-s r".split()
+        + "-s pending".split()
+        + "-s u".split()
+        + "-s dne".split(),
+        "-c run".split()
+        + "-c collect-run".split()
+        + "-s successful".split()
+        + "-s fail".split()
+        + "-s r".split()
+        + "-s pending".split()
+        + "-s u".split()
+        + "-s dne".split(),
+    ],
+)
+@pytest.mark.parametrize("single_command", [True, False])
+def test_failed_run(
+    tmp_path,
+    test_data_dir,
+    cli_runner,
+    extra_args,
+    monkeypatch,
+    single_command,
+):
+    """Test the status command."""
+
+    monkeypatch.setattr(psutil, "pid_exists", lambda *__: True, raising=True)
+
+    status = status_command()
+    if single_command:
+        shutil.copytree(
+            test_data_dir / "test_failed_run", tmp_path / "test_failed_run"
+        )
+        run_dir = (tmp_path / "test_failed_run").as_posix()
+        pipe_json = Path(run_dir) / "test_failed_run_status.json"
+        with open(pipe_json, "r") as config_file:
+            config = json.load(config_file)
+        config.pop("collect-run")
+        with open(pipe_json, "w") as config_file:
+            json.dump(config, config_file)
+    else:
+        run_dir = (test_data_dir / "test_failed_run").as_posix()
+
+    if "dne" in extra_args:
+        with pytest.warns(gapsWarning):
+            result = cli_runner.invoke(status, [run_dir] + extra_args)
+    else:
+        result = cli_runner.invoke(status, [run_dir] + extra_args)
+
+    lines = result.stdout.split("\n")
+    cols = [
+        "job_status",
+        "time_submitted",
+        "time_start",
+        "time_end",
+        "total_runtime",
+        "hardware",
+        "qos",
+    ]
+    if "out_dir" in extra_args:
+        cols += ["out_dir"]
+    cols = " ".join(cols)
+
+    expected_partial_lines = [
+        "test_failed_run",
+        "MONITOR PID: 1234",
+        cols,
+        "--",
+        "gaps_test_failed_run_j0 failed local",
+        "gaps_test_failed_run_j1 failed local high",
+        "gaps_test_failed_run_j2 failed local unspecified",
+    ]
+    if single_command:
+        start_ind = -12
+    else:
+        expected_partial_lines += ["collect-run not submitted"]
+        start_ind = -14
+
+    for ind, partial in enumerate(expected_partial_lines):
+        assert all(
+            string in lines[start_ind + ind] for string in partial.split()
+        ), partial
+        assert not "(r)" in lines[start_ind + ind]
+        assert "Total AUs spent" not in lines[start_ind + ind]
+        if single_command:
+            assert "Total project wall time" not in lines[start_ind + 10]
+
+    if single_command:
+        assert "Total Runtime: 0:00:00" in lines[-4]
+    else:
+        assert "Total Runtime: 0:00:00" in lines[-5]
+        assert "Total project wall time" in lines[-4]
+
+
 if __name__ == "__main__":
     pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"])
diff --git a/tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node0.json b/tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node0.json
new file mode 100644
index 00000000..b0e9ff67
--- /dev/null
+++ b/tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node0.json
@@ -0,0 +1,11 @@
+{
+    "run": {
+        "gaps_test_failed_run_j0": {
+            "job_status": "submitted",
+            "out_dir": "/a/test/path",
+            "out_file": null,
+            "time_start": "07-Dec-2022 14:21:38",
+            "hardware": "local"
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node1.json b/tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node1.json
new file mode 100644
index 00000000..a3f98b33
--- /dev/null
+++ b/tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node1.json
@@ -0,0 +1,10 @@
+{
+    "run": {
+        "gaps_test_failed_run_j1": {
+            "job_status": "failed",
+            "out_dir": "/a/test/path",
+            "time_start": "07-Dec-2022 14:25:21",
+            "qos": "high"
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node2.json b/tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node2.json
new file mode 100644
index 00000000..049a03d5
--- /dev/null
+++ b/tests/data/test_failed_run/jobstatus_gaps_test_failed_run_node2.json
@@ -0,0 +1,10 @@
+{
+    "run": {
+        "gaps_test_failed_run_j2": {
+            "job_status": "running",
+            "out_dir": "/a/test/path",
+            "time_start": "07-Dec-2022 14:26:17",
+            "qos": "unspecified"
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/data/test_failed_run/test_failed_run_status.json b/tests/data/test_failed_run/test_failed_run_status.json
new file mode 100644
index 00000000..5af608d0
--- /dev/null
+++ b/tests/data/test_failed_run/test_failed_run_status.json
@@ -0,0 +1,26 @@
+{
+    "another_input": ["hello world"],
+    "a_dict_input": {"a_value": 42},
+    "monitor_pid": 1234,
+    "run": {
+        "pipeline_index": 0,
+        "gaps_test_failed_run_j0": {
+            "job_status": "submitted",
+            "hardware": "local",
+            "time_submitted": "07-Dec-2022 14:21:33"
+        },
+        "gaps_test_failed_run_j1": {
+            "job_status": "submitted",
+            "hardware": "local",
+            "time_submitted": "07-Dec-2022 14:25:17"
+        },
+        "gaps_test_failed_run_j2": {
+            "job_status": "submitted",
+            "hardware": "local",
+            "time_submitted": "07-Dec-2022 14:25:17"
+        }
+    },
+    "collect-run": {
+        "pipeline_index": 1
+    }
+}
\ No newline at end of file

From 672ef5f832c05142326876b1a5a86ff17d941854 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Mon, 7 Aug 2023 12:11:31 -0600
Subject: [PATCH 35/61] Updated log versions logic

---
 gaps/collection.py | 4 ++--
 gaps/log.py        | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/gaps/collection.py b/gaps/collection.py
index 813ba132..086a651f 100644
--- a/gaps/collection.py
+++ b/gaps/collection.py
@@ -14,7 +14,7 @@
 import pandas as pd
 
 from rex import Resource, Outputs
-from rex.utilities import log_versions
+from gaps.log import log_versions
 from gaps.warnings import gapsCollectionWarning
 from gaps.exceptions import gapsRuntimeError
 from gaps.utilities import project_points_from_container_or_slice
@@ -425,7 +425,7 @@ def __init__(
             Flag to purge output HDF5 file if it already exists.
             By default, `False`.
         """
-        log_versions(logger)
+        log_versions()
         self.h5_out = Path(h5_file)
         self.collect_pattern = collect_pattern
         if clobber and self.h5_out.exists():
diff --git a/gaps/log.py b/gaps/log.py
index ded22726..7c1b92bf 100644
--- a/gaps/log.py
+++ b/gaps/log.py
@@ -20,8 +20,8 @@
 
 def log_versions():
     """Log package versions."""
-    gaps.logger.debug("Running with gaps version %s", gaps.__version__)
-    gaps.logger.debug("  - rex version %s", rex.__version__)
+    gaps.logger.info("Running with gaps version %s", gaps.__version__)
+    rex.utilities.log_versions(gaps.logger)
 
 
 def log_mem(log_level="DEBUG"):

From 1f8af50ea2fcf69cbf8f215e1aec77726e13e59a Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Mon, 7 Aug 2023 12:48:25 -0600
Subject: [PATCH 36/61] Added timestamp to monitor output

---
 gaps/pipeline.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gaps/pipeline.py b/gaps/pipeline.py
index e8c59c9e..2326abde 100644
--- a/gaps/pipeline.py
+++ b/gaps/pipeline.py
@@ -191,11 +191,12 @@ def _get_command_return_code(self, status, command):
         if return_code != StatusOption.FAILED and check_failed:
             fail_str = ", but some jobs have failed"
         logger.info(
-            "CLI command %r for job %r %s%s.",
+            "CLI command %r for job %r %s%s. (%s)",
             command,
             self.name,
             return_code.with_verb,  # pylint: disable=no-member
             fail_str,
+            time.ctime(),
         )
 
         return return_code

From a28cb9057421511cb853de9ab177069a9966d0bb Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Mon, 7 Aug 2023 12:57:17 -0600
Subject: [PATCH 37/61] Fix error message

---
 gaps/pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gaps/pipeline.py b/gaps/pipeline.py
index 2326abde..347f0c0e 100644
--- a/gaps/pipeline.py
+++ b/gaps/pipeline.py
@@ -180,7 +180,7 @@ def _get_command_return_code(self, status, command):
                 elif job_status is None:
                     arr.append(StatusOption.COMPLETE)
                 else:
-                    msg = "Job status code {job_status!r} not understood!"
+                    msg = f"Job status code {job_status!r} not understood!"
                     raise gapsValueError(msg)
 
             _dump_sorted(status)

From dd07f7ee9193b261d788c971f5bf90d605ba25e0 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Mon, 7 Aug 2023 13:32:15 -0600
Subject: [PATCH 38/61] Add `reset_query_cache` function

---
 gaps/hpc.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gaps/hpc.py b/gaps/hpc.py
index bf0df633..63d636ea 100644
--- a/gaps/hpc.py
+++ b/gaps/hpc.py
@@ -102,6 +102,10 @@ def queue(self):
 
         return self._queue
 
+    def reset_query_cache(self):
+        """Reset the query dict cache so that hardware is queried again."""
+        self._queue = None
+
     def check_status_using_job_id(self, job_id):
         """Check the status of a job using the HPC queue and job ID.
 
@@ -154,7 +158,7 @@ def cancel(self, arg):
                 self.cancel(job_id)
 
         elif str(arg).lower() == "all":
-            self._queue = None
+            self.reset_query_cache()
             for job_id in self.queue.keys():
                 self.cancel(job_id)
 

From e8c4161d365e76544714c6a8915d8429b44f1e02 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Mon, 7 Aug 2023 14:23:24 -0600
Subject: [PATCH 39/61] Monitor jobs now correctly query the hardware

---
 gaps/pipeline.py | 5 +++--
 gaps/status.py   | 8 ++++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/gaps/pipeline.py b/gaps/pipeline.py
index 347f0c0e..120350e2 100644
--- a/gaps/pipeline.py
+++ b/gaps/pipeline.py
@@ -8,7 +8,7 @@
 from warnings import warn
 
 from gaps.hpc import SLURM
-from gaps.status import Status, StatusOption, StatusField
+from gaps.status import Status, StatusOption, StatusField, HardwareOption
 from gaps.utilities import recursively_update_dict
 from gaps.config import load_config, init_logging_from_config
 from gaps.exceptions import (
@@ -114,6 +114,7 @@ def _monitor(self, step, seconds=5, step_status=StatusOption.RUNNING):
 
         while step_status.is_processing:
             time.sleep(seconds)
+            HardwareOption.reset_all_cached_queries()
             step_status = self._status(step)
 
             if step_status == StatusOption.FAILED:
@@ -156,7 +157,7 @@ def _get_command_return_code(self, status, command):
         # initialize return code array
         arr = []
         check_failed = False
-        status.update_from_all_job_files()
+        status.update_from_all_job_files(check_hardware=True)
 
         if command not in status.data:
             # assume running
diff --git a/gaps/status.py b/gaps/status.py
index 2278189b..79b6fccb 100644
--- a/gaps/status.py
+++ b/gaps/status.py
@@ -80,6 +80,14 @@ def _new_post_hook(cls, obj, value):
             obj.charge_factor = 0
         return obj
 
+    # pylint: disable=no-member
+    @classmethod
+    def reset_all_cached_queries(cls):
+        """Reset all cached hardware queries."""
+        cls.EAGLE.manager.reset_query_cache()
+        cls.KESTREL.manager.reset_query_cache()
+        cls.PEREGRINE.manager.reset_query_cache()
+
 
 class StatusOption(CaseInsensitiveEnum):
     """A collection of job status options."""

From a55d0fd7ce25ea92c4af66004d29047af6a937c1 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Mon, 7 Aug 2023 15:27:50 -0600
Subject: [PATCH 40/61] Updated status option check to be more robust

---
 gaps/status.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/gaps/status.py b/gaps/status.py
index 79b6fccb..db8350cd 100644
--- a/gaps/status.py
+++ b/gaps/status.py
@@ -344,10 +344,14 @@ def _job_statuses(self):
     def _update_job_status_from_hardware(job_data, hardware_status_retriever):
         """Update job status to failed if it is processing but DNE on HPC."""
 
-        if not StatusOption(
-            job_data.get(StatusField.JOB_STATUS, StatusOption.NOT_SUBMITTED)
-        ).is_processing:
-            return
+        status = job_data.get(
+            StatusField.JOB_STATUS, StatusOption.NOT_SUBMITTED
+        )
+        try:
+            if not StatusOption(status).is_processing:
+                return
+        except ValueError:
+            pass
 
         job_id = job_data.get(StatusField.JOB_ID, None)
         job_hardware = job_data.get(StatusField.HARDWARE, None)

From 2198571220f09305db5c4b44def871aa651cf559 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Mon, 7 Aug 2023 15:28:26 -0600
Subject: [PATCH 41/61] Do not submit pipeline if another monitor pipeline is
 already running

---
 gaps/cli/pipeline.py           | 20 +++++++++++++++++++-
 tests/cli/test_cli_pipeline.py | 29 +++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/gaps/cli/pipeline.py b/gaps/cli/pipeline.py
index ef1dfae7..26b4857a 100644
--- a/gaps/cli/pipeline.py
+++ b/gaps/cli/pipeline.py
@@ -6,15 +6,18 @@
 import sys
 import logging
 from pathlib import Path
+from warnings import warn
 
 import click
+import psutil
 
 from gaps import Pipeline
 from gaps.config import ConfigType
 from gaps.cli.documentation import _pipeline_command_help
 from gaps.cli.command import _WrappedCommand
-from gaps.status import Status
+from gaps.status import Status, StatusField
 from gaps.exceptions import gapsExecutionError
+from gaps.warnings import gapsWarning
 
 
 logger = logging.getLogger(__name__)
@@ -62,6 +65,7 @@ def pipeline(ctx, config_file, cancel, monitor, background=False):
     if cancel:
         Pipeline.cancel_all(config_file)
         return
+
     if background:
         if not _can_run_background():
             msg = (
@@ -72,6 +76,20 @@ def pipeline(ctx, config_file, cancel, monitor, background=False):
         ctx.obj["LOG_STREAM"] = False
         _kickoff_background(config_file)
 
+    project_dir = str(Path(config_file).parent.expanduser().resolve())
+    status = Status(project_dir).update_from_all_job_files(purge=False)
+    monitor_pid = status.get(StatusField.MONITOR_PID)
+    if monitor_pid is not None and psutil.pid_exists(monitor_pid):
+        msg = (
+            f"Another pipeline in {project_dir!r} is running on monitor PID: "
+            f"{monitor_pid}. Not starting a new pipeline execution."
+        )
+        warn(msg, gapsWarning)
+        return
+
+    if monitor:
+        Status.record_monitor_pid(Path(config_file).parent, os.getpid())
+
     Pipeline.run(config_file, monitor=monitor)
 
 
diff --git a/tests/cli/test_cli_pipeline.py b/tests/cli/test_cli_pipeline.py
index 027142c3..69b786cb 100644
--- a/tests/cli/test_cli_pipeline.py
+++ b/tests/cli/test_cli_pipeline.py
@@ -21,6 +21,7 @@
     _can_run_background,
 )
 from gaps.exceptions import gapsExecutionError
+from gaps.warnings import gapsWarning
 
 
 TEST_FILE_DIR = Path(__file__).parent.as_posix()
@@ -103,9 +104,12 @@ def test_pipeline_command(
 
     if not extra_args:
         cli_runner.invoke(pipe, ["-c", pipe_config_fp.as_posix()])
+    else:
+        assert Status(tmp_path).get(StatusField.MONITOR_PID) == os.getpid()
 
     with open(target_config_fp, "r") as config:
         assert json.load(config) == SUCCESS_CONFIG
+
     assert_message_was_logged("Pipeline job", "INFO")
     assert_message_was_logged("is complete.", "INFO")
 
@@ -219,5 +223,30 @@ def _test_func():
     assert config == expected_config
 
 
+def test_pipeline_command_with_running_pid(tmp_path, cli_runner, monkeypatch):
+    """Assert pipeline_command does not start processing if existing pid."""
+
+    monkeypatch.setattr(
+        gaps.cli.pipeline.Status,
+        "get",
+        lambda *__, **___: os.getpid(),
+        raising=True,
+    )
+
+    pipe_config_fp = tmp_path / "config_pipe.json"
+    pipe_config_fp.touch()
+
+    pipe = pipeline_command({})
+    with pytest.warns(gapsWarning) as warn_info:
+        cli_runner.invoke(pipe, ["-c", pipe_config_fp.as_posix()], obj={})
+
+    assert "Another pipeline" in warn_info[0].message.args[0]
+    assert "is running on monitor PID:" in warn_info[0].message.args[0]
+    assert f"{os.getpid()}" in warn_info[0].message.args[0]
+    assert (
+        "Not starting a new pipeline execution" in warn_info[0].message.args[0]
+    )
+
+
 if __name__ == "__main__":
     pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"])

From fe04d392cbe1f5e6b34f3547d3db9066f15a4bfd Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Mon, 7 Aug 2023 15:59:22 -0600
Subject: [PATCH 42/61] Monitor jobs now cancelled if exdception detected

---
 gaps/pipeline.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gaps/pipeline.py b/gaps/pipeline.py
index 120350e2..9717a1d2 100644
--- a/gaps/pipeline.py
+++ b/gaps/pipeline.py
@@ -103,7 +103,11 @@ def _main(self):
                 break
 
             time.sleep(1)
-            self._monitor(step)
+            try:
+                self._monitor(step)
+            except Exception as error:
+                self._cancel_all_jobs()
+                raise error
 
         else:
             logger.info("Pipeline job %r is complete.", self.name)

From 984d61ab9391f42ceaad776fb7ffa59607991196 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Mon, 7 Aug 2023 16:00:01 -0600
Subject: [PATCH 43/61] Cancelling jobs is now hardware-aware

---
 gaps/pipeline.py       |  9 +++++----
 gaps/status.py         |  5 +++++
 tests/test_pipeline.py | 18 ++++++++++++++----
 3 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/gaps/pipeline.py b/gaps/pipeline.py
index 9717a1d2..effd1c2b 100644
--- a/gaps/pipeline.py
+++ b/gaps/pipeline.py
@@ -7,7 +7,6 @@
 from pathlib import Path
 from warnings import warn
 
-from gaps.hpc import SLURM
 from gaps.status import Status, StatusOption, StatusField, HardwareOption
 from gaps.utilities import recursively_update_dict
 from gaps.config import load_config, init_logging_from_config
@@ -80,9 +79,11 @@ def name(self):
 
     def _cancel_all_jobs(self):
         """Cancel all jobs in this pipeline."""
-        slurm_manager = SLURM()
-        for job_id in self.status.job_ids:
-            slurm_manager.cancel(job_id)
+        status = self.status
+        for job_id, hardware in zip(status.job_ids, status.job_hardware):
+            manager = HardwareOption(hardware).manager
+            if manager is not None:
+                manager.cancel(job_id)
         logger.info("Pipeline job %r cancelled.", self.name)
 
     def _main(self):
diff --git a/gaps/status.py b/gaps/status.py
index db8350cd..499627e4 100644
--- a/gaps/status.py
+++ b/gaps/status.py
@@ -201,6 +201,11 @@ def job_ids(self):
         """list: Flat list of job ids."""
         return _get_attr_flat_list(self.data, key=StatusField.JOB_ID)
 
+    @property
+    def job_hardware(self):
+        """list: Flat list of job hardware options."""
+        return _get_attr_flat_list(self.data, key=StatusField.HARDWARE)
+
     def as_df(self, commands=None, index_name="job_name", include_cols=None):
         """Format status as pandas DataFrame.
 
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index 02ab0bdc..ba7c94e6 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -18,6 +18,7 @@
     Status,
     StatusField,
     StatusOption,
+    HardwareOption,
     HardwareStatusRetriever,
 )
 from gaps.pipeline import (
@@ -521,22 +522,31 @@ def cache_cancel_calls(__, job_id):
         sample_pipeline_config.parent,
         "run",
         "test1",
-        job_attrs={StatusField.JOB_ID: 0},
+        job_attrs={
+            StatusField.JOB_ID: 0,
+            StatusField.HARDWARE: HardwareOption.LOCAL,
+        },
     )
     Status.mark_job_as_submitted(
         sample_pipeline_config.parent,
         "run",
         "test2",
-        job_attrs={StatusField.JOB_ID: 1},
+        job_attrs={
+            StatusField.JOB_ID: 1,
+            StatusField.HARDWARE: HardwareOption.EAGLE,
+        },
     )
     Status.mark_job_as_submitted(
         sample_pipeline_config.parent,
         "run",
         "test3",
-        job_attrs={StatusField.JOB_ID: 12},
+        job_attrs={
+            StatusField.JOB_ID: 12,
+            StatusField.HARDWARE: HardwareOption.EAGLE,
+        },
     )
     Pipeline.cancel_all(sample_pipeline_config)
-    assert set(cancelled_jobs) == {0, 1, 12}
+    assert set(cancelled_jobs) == {1, 12}
     assert_message_was_logged("Pipeline job", "INFO")
     assert_message_was_logged("cancelled", "INFO")
 

From b27c3e505cc76f5606f2fd4a38131f6dc2685e3d Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Mon, 7 Aug 2023 17:03:17 -0600
Subject: [PATCH 44/61] Update logger initialization

---
 gaps/batch.py                  | 11 +--------
 gaps/cli/batch.py              |  3 +++
 gaps/cli/pipeline.py           |  4 +++-
 gaps/config.py                 | 20 +++++++++++-----
 gaps/pipeline.py               |  4 +---
 tests/cli/test_cli_pipeline.py | 43 +++++++++++++++++-----------------
 6 files changed, 44 insertions(+), 41 deletions(-)

diff --git a/gaps/batch.py b/gaps/batch.py
index a41a2c9c..7b9f67e7 100644
--- a/gaps/batch.py
+++ b/gaps/batch.py
@@ -17,12 +17,7 @@
 
 from rex.utilities import parse_year
 
-from gaps.config import (
-    load_config,
-    init_logging_from_config,
-    ConfigType,
-    resolve_all_paths,
-)
+from gaps.config import load_config, ConfigType, resolve_all_paths
 import gaps.cli.pipeline
 from gaps.pipeline import Pipeline
 from gaps.exceptions import gapsValueError, gapsConfigError
@@ -56,8 +51,6 @@ def __init__(self, config):
         self._job_tags = None
         self._base_dir, config = _load_batch_config(config)
         self._pipeline_fp = Path(config["pipeline_config"])
-
-        init_logging_from_config(config)
         self._sets = _parse_config(config)
 
         logger.info("Batch job initialized with %d sub jobs.", len(self._sets))
@@ -97,14 +90,12 @@ def _make_job_dirs(self):
 
         # walk through current directory getting everything to copy
         for source_dir, _, filenames in os.walk(self._base_dir):
-
             # don't make additional copies of job sub directories.
             if any(job_tag in source_dir for job_tag in self._sets):
                 continue
 
             # For each dir level, iterate through the batch arg combos
             for tag, (arg_comb, mod_files, __) in self._sets.items():
-
                 # Add the job tag to the directory path.
                 # This will copy config subdirs into the job subdirs
                 source_dir = Path(source_dir)
diff --git a/gaps/cli/batch.py b/gaps/cli/batch.py
index b287dbaf..beb2d7ff 100644
--- a/gaps/cli/batch.py
+++ b/gaps/cli/batch.py
@@ -5,12 +5,15 @@
 import click
 
 import gaps.batch
+from gaps.config import init_logging_from_config_file
 from gaps.cli.command import _WrappedCommand
 from gaps.cli.documentation import _batch_command_help
 
 
 def _batch(config_file, dry, cancel, delete, monitor_background):
     """Execute an analysis pipeline over a parametric set of inputs"""
+    init_logging_from_config_file(config_file, background=monitor_background)
+
     if cancel:
         gaps.batch.BatchJob(config_file).cancel()
     elif delete:
diff --git a/gaps/cli/pipeline.py b/gaps/cli/pipeline.py
index 26b4857a..a1c16e2f 100644
--- a/gaps/cli/pipeline.py
+++ b/gaps/cli/pipeline.py
@@ -12,7 +12,7 @@
 import psutil
 
 from gaps import Pipeline
-from gaps.config import ConfigType
+from gaps.config import ConfigType, init_logging_from_config_file
 from gaps.cli.documentation import _pipeline_command_help
 from gaps.cli.command import _WrappedCommand
 from gaps.status import Status, StatusField
@@ -62,6 +62,8 @@ def pipeline(ctx, config_file, cancel, monitor, background=False):
 
         config_file = config_file[0]
 
+    init_logging_from_config_file(config_file, background=background)
+
     if cancel:
         Pipeline.cancel_all(config_file)
         return
diff --git a/gaps/config.py b/gaps/config.py
index f0a5b06a..7fe73598 100644
--- a/gaps/config.py
+++ b/gaps/config.py
@@ -255,26 +255,34 @@ def resolve_all_paths(container, base_dir):
     return container
 
 
-def init_logging_from_config(config):
+def init_logging_from_config_file(config_file, background=False):
     """Init logging, taking care of legacy rex-style kwargs.
 
     Parameters
     ----------
-    config : dict
-        Dictionary which may or may not contain a "logging" key. If key
-        not found, no further action is taken. If key is found, the
-        value is expected to be a dictionary of keyword-argument pairs
+    config_file : path-like
+        Path to a config file parsable as a dictionary which may or may
+        not contain a "logging" key. If key not found, no further action
+        is taken. If key is found, the value is expected to be a
+        dictionary of keyword-argument pairs
         to :func:`gaps.log.init_logger`. rex-style keys ("log_level",
         "log_file", "log_format") are allowed. The values of these
         inputs are used to initialize a gaps logger.
+    background : bool, optional
+        Optional flag to specify background job initialization. If
+        ``True``, then stream output is disabled. By default, ``False``.
     """
-
+    config = load_config(config_file)
     if "logging" not in config:
         return
+
     kwargs = config["logging"]
     kwarg_map = {"log_level": "level", "log_file": "file", "log_format": "fmt"}
     for legacy_kwarg, new_kwarg in kwarg_map.items():
         if legacy_kwarg in kwargs:
             kwargs[new_kwarg] = kwargs.pop(legacy_kwarg)
 
+    if background:
+        kwargs["stream"] = False
+
     init_logger(**kwargs)
diff --git a/gaps/pipeline.py b/gaps/pipeline.py
index effd1c2b..7d3bae95 100644
--- a/gaps/pipeline.py
+++ b/gaps/pipeline.py
@@ -9,7 +9,7 @@
 
 from gaps.status import Status, StatusOption, StatusField, HardwareOption
 from gaps.utilities import recursively_update_dict
-from gaps.config import load_config, init_logging_from_config
+from gaps.config import load_config
 from gaps.exceptions import (
     gapsConfigError,
     gapsExecutionError,
@@ -51,8 +51,6 @@ def __init__(self, pipeline, monitor=True):
         self._run_list = _check_pipeline(config)
         self._init_status()
 
-        init_logging_from_config(config)
-
     def _init_status(self):
         """Initialize the status json in the output directory."""
         status = self.status
diff --git a/tests/cli/test_cli_pipeline.py b/tests/cli/test_cli_pipeline.py
index 69b786cb..b6a2d71b 100644
--- a/tests/cli/test_cli_pipeline.py
+++ b/tests/cli/test_cli_pipeline.py
@@ -35,15 +35,28 @@
 
 
 @pytest.fixture
-def runnable_pipeline():
+def runnable_pipeline(tmp_path):
     """Add run to pipeline commands for test only."""
     try:
         Pipeline.COMMANDS["run"] = run
+        pipe_config_fp = tmp_path / "config_pipe.json"
+        with open(pipe_config_fp, "w") as config_file:
+            json.dump(SAMPLE_CONFIG, config_file)
         yield
     finally:
         Pipeline.COMMANDS.pop("run")
 
 
+@pytest.fixture
+def pipe_config_fp(tmp_path):
+    """Add a sample pipeline config to a temp directory."""
+    pipe_config_fp = tmp_path / "config_pipe.json"
+    with open(pipe_config_fp, "w") as config_file:
+        json.dump(SAMPLE_CONFIG, config_file)
+
+    yield pipe_config_fp
+
+
 @click.command()
 @click.option("--config", "-c", default=".", help="Path to config file")
 def run(config):
@@ -57,7 +70,7 @@ def run(config):
 
 
 # pylint: disable=no-value-for-parameter
-def test_can_run_background(monkeypatch, test_ctx, tmp_path):
+def test_can_run_background(monkeypatch, test_ctx, pipe_config_fp):
     """Test the `_can_run_background` method"""
 
     monkeypatch.setattr(os, "setsid", lambda: None, raising=False)
@@ -70,10 +83,8 @@ def test_can_run_background(monkeypatch, test_ctx, tmp_path):
 
     assert not _can_run_background()
 
-    pipe_config_fp = tmp_path / "config_pipe.json"
-    pipe_config_fp.touch()
     with pytest.raises(gapsExecutionError) as exc_info:
-        pipeline(tmp_path, cancel=False, monitor=False, background=True)
+        pipeline(pipe_config_fp, cancel=False, monitor=False, background=True)
 
     assert "Cannot run pipeline in background on system" in str(exc_info)
 
@@ -84,6 +95,7 @@ def test_pipeline_command(
     tmp_path,
     cli_runner,
     runnable_pipeline,
+    pipe_config_fp,
     assert_message_was_logged,
 ):
     """Test the pipeline_command creation."""
@@ -91,10 +103,6 @@ def test_pipeline_command(
     target_config_fp = tmp_path / "config_run.json"
     target_config_fp.touch()
 
-    pipe_config_fp = tmp_path / "config_pipe.json"
-    with open(pipe_config_fp, "w") as config_file:
-        json.dump(SAMPLE_CONFIG, config_file)
-
     pipe = pipeline_command({})
     if _can_run_background():
         assert "background" in [opt.name for opt in pipe.params]
@@ -118,7 +126,7 @@ def test_pipeline_command(
     "extra_args", [["--background"], ["--monitor", "--background"]]
 )
 def test_pipeline_command_with_background(
-    extra_args, tmp_path, cli_runner, monkeypatch
+    extra_args, pipe_config_fp, cli_runner, monkeypatch
 ):
     """Test the pipeline_command creation with background."""
 
@@ -133,9 +141,6 @@ def _new_run(config_file):
     monkeypatch.setattr(os, "setsid", lambda: None, raising=False)
     monkeypatch.setattr(os, "fork", lambda: None, raising=False)
 
-    pipe_config_fp = tmp_path / "config_pipe.json"
-    pipe_config_fp.touch()
-
     pipe = pipeline_command({})
     assert "background" in [opt.name for opt in pipe.params]
     assert not kickoff_background_cache
@@ -147,12 +152,9 @@ def _new_run(config_file):
     assert pipe_config_fp.as_posix() in kickoff_background_cache[0]
 
 
-def test_pipeline_command_cancel(tmp_path, cli_runner, monkeypatch):
+def test_pipeline_command_cancel(pipe_config_fp, cli_runner, monkeypatch):
     """Test the pipeline_command with --cancel."""
 
-    pipe_config_fp = tmp_path / "config_pipe.json"
-    pipe_config_fp.touch()
-
     def _new_cancel(config):
         assert config == pipe_config_fp.as_posix()
 
@@ -223,7 +225,9 @@ def _test_func():
     assert config == expected_config
 
 
-def test_pipeline_command_with_running_pid(tmp_path, cli_runner, monkeypatch):
+def test_pipeline_command_with_running_pid(
+    pipe_config_fp, cli_runner, monkeypatch
+):
     """Assert pipeline_command does not start processing if existing pid."""
 
     monkeypatch.setattr(
@@ -233,9 +237,6 @@ def test_pipeline_command_with_running_pid(tmp_path, cli_runner, monkeypatch):
         raising=True,
     )
 
-    pipe_config_fp = tmp_path / "config_pipe.json"
-    pipe_config_fp.touch()
-
     pipe = pipeline_command({})
     with pytest.warns(gapsWarning) as warn_info:
         cli_runner.invoke(pipe, ["-c", pipe_config_fp.as_posix()], obj={})

From b10ccbc04e9cbb837a24d5486aa89ca544209c7d Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Mon, 7 Aug 2023 22:18:03 -0600
Subject: [PATCH 45/61] Tabs now grouped together in docs

---
 gaps/cli/documentation.py | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py
index d0aed8e8..432310fa 100644
--- a/gaps/cli/documentation.py
+++ b/gaps/cli/documentation.py
@@ -2,6 +2,7 @@
 """
 CLI documentation utilities.
 """
+import sys
 from copy import deepcopy
 from functools import lru_cache
 from inspect import signature, isclass
@@ -107,17 +108,17 @@
 
 .. tabs::
 
-    .. tab:: JSON
+    .. group-tab:: JSON
         ::
 
             {template_json_config}
 
-    .. tab:: YAML
+    .. group-tab:: YAML
         ::
 
             {template_yaml_config}
 
-    .. tab:: TOML
+    .. group-tab:: TOML
         ::
 
             {template_toml_config}
@@ -148,17 +149,17 @@
 
 .. tabs::
 
-    .. tab:: JSON
+    .. group-tab:: JSON
         ::
 
             {template_json_config}
 
-    .. tab:: YAML
+    .. group-tab:: YAML
         ::
 
             {template_yaml_config}
 
-    .. tab:: TOML
+    .. group-tab:: TOML
         ::
 
             {template_toml_config}
@@ -182,17 +183,17 @@
 
             .. tabs::
 
-                .. tab:: JSON
+                .. group-tab:: JSON
                     ::
 
                         {sample_json_args_dict}
 
-                .. tab:: YAML
+                .. group-tab:: YAML
                     ::
 
                         {sample_yaml_args_dict}
 
-                .. tab:: TOML
+                .. group-tab:: TOML
                     ::
 
                         {sample_toml_args_dict}
@@ -219,17 +220,17 @@
 
             .. tabs::
 
-                .. tab:: JSON
+                .. group-tab:: JSON
                     ::
 
                         {sample_json_files}
 
-                .. tab:: YAML
+                .. group-tab:: YAML
                     ::
 
                         {sample_yaml_files}
 
-                .. tab:: TOML
+                .. group-tab:: TOML
                     ::
 
                         {sample_toml_files}
@@ -248,17 +249,17 @@
 
 .. tabs::
 
-    .. tab:: JSON
+    .. group-tab:: JSON
         ::
 
             {template_json_config}
 
-    .. tab:: YAML
+    .. group-tab:: YAML
         ::
 
             {template_yaml_config}
 
-    .. tab:: TOML
+    .. group-tab:: TOML
         ::
 
             {template_toml_config}

From c5da3958e7005e370bd637fc03e230068f6d44bc Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Tue, 8 Aug 2023 16:12:06 -0600
Subject: [PATCH 46/61] Set max width on terminal to look good with docs

---
 gaps/cli/cli.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gaps/cli/cli.py b/gaps/cli/cli.py
index 0911ef57..fb30fff7 100644
--- a/gaps/cli/cli.py
+++ b/gaps/cli/cli.py
@@ -216,3 +216,4 @@ def _main_cb(ctx, verbose):
     """Set the obj and verbose settings of the commands."""
     ctx.ensure_object(dict)
     ctx.obj["VERBOSE"] = verbose
+    ctx.max_content_width = 92

From da1c6f346ed8d53954b07bdcf400a187af26a2cf Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Tue, 8 Aug 2023 16:12:31 -0600
Subject: [PATCH 47/61] Add `_is_sphinx_build` utility function

---
 gaps/utilities.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/gaps/utilities.py b/gaps/utilities.py
index b401596b..9e83f5ae 100644
--- a/gaps/utilities.py
+++ b/gaps/utilities.py
@@ -2,6 +2,7 @@
 """
 GAPs utilities.
 """
+import sys
 import copy
 import logging
 import collections
@@ -165,3 +166,8 @@ def resolve_path(path, base_dir):
         pass
 
     return path
+
+
+def _is_sphinx_build():
+    """``True`` if sphinx is in system modules, else ``False``"""
+    return "sphinx" in sys.modules

From 10876925bd50c4d739c01066a8de4d3eedc96e15 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Tue, 8 Aug 2023 18:24:52 -0600
Subject: [PATCH 48/61] CLI command now says "Config Parameters"

---
 gaps/cli/command.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/gaps/cli/command.py b/gaps/cli/command.py
index 8cdb95f1..b9e56c29 100644
--- a/gaps/cli/command.py
+++ b/gaps/cli/command.py
@@ -12,6 +12,7 @@
 from gaps.cli.config import GAPS_SUPPLIED_ARGS
 from gaps.cli.documentation import CommandDocumentation
 from gaps.cli.preprocessing import split_project_points_into_ranges
+from gaps.utilities import _is_sphinx_build
 
 
 class AbstractBaseCLICommandConfiguration(ABC):
@@ -711,6 +712,12 @@ def wrap_text(
             )
             if "Parameters\n----------" not in wrapped_text.replace(" ", ""):
                 wrapped_text = wrapped_text.replace(".\n", ".\n\n")
+            elif not _is_sphinx_build():  # pragma: no cover
+                wrapped_text = wrapped_text.replace(
+                    "Parameters\n----------",
+                    "\nConfig Parameters\n-----------------",
+                )
+
             return wrapped_text
 
         click.formatting.wrap_text = wrap_text

From 8105c8285c67fdd4f952c470732c2602500f85d9 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Wed, 9 Aug 2023 10:42:33 -0600
Subject: [PATCH 49/61] Big update to documentation formatting to support
 pretty docs in both CLI and SPhinx

---
 gaps/cli/documentation.py           | 402 +++++++++++++++-------------
 tests/cli/test_cli_documentation.py |  30 ++-
 2 files changed, 248 insertions(+), 184 deletions(-)

diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py
index 432310fa..0cf6974b 100644
--- a/gaps/cli/documentation.py
+++ b/gaps/cli/documentation.py
@@ -2,14 +2,15 @@
 """
 CLI documentation utilities.
 """
-import sys
 from copy import deepcopy
+from itertools import chain
 from functools import lru_cache
 from inspect import signature, isclass
 
 from numpydoc.docscrape import NumpyDocString
 
 from gaps.config import config_as_str_for_docstring, ConfigType
+from gaps.utilities import _is_sphinx_build
 
 
 DEFAULT_EXEC_VALUES = {
@@ -27,21 +28,19 @@
 }
 
 EXTRA_EXEC_PARAMS = {
-    "max_workers": "Maximum number of parallel workers run on each node.",
-    "sites_per_worker": "Number of sites to run in series on a worker.",
-    "memory_utilization_limit": (
-        "Memory utilization limit (fractional). Must be a value between 0 "
-        "and 100. This input sets how much data will be stored in-memory "
-        "at any given time before flushing to disk."
-    ),
-    "timeout": (
-        "Number of seconds to wait for parallel run iteration to complete "
-        "before early termination"
-    ),
-    "pool_size": (
-        "Number of futures to submit to a single process pool for "
-        "parallel futures."
-    ),
+    "max_workers": """Maximum number of parallel workers run on each node.""",
+    "sites_per_worker": """Number of sites to run in series on a worker.""",
+    "memory_utilization_limit": """Memory utilization limit (fractional). Must be a value
+                between 0 and 100. This input sets how much data will
+                be stored in-memory at any given time before flushing
+                to disk.
+        """,
+    "timeout": """Number of seconds to wait for parallel run iteration
+                to complete before early termination.
+        """,
+    "pool_size": """Number of futures to submit to a single process pool
+                for parallel futures.
+        """,
 }
 
 CONFIG_TYPES = [
@@ -102,86 +101,71 @@
 """
 
 PIPELINE_CONFIG_DOC = """
-Path to the ``pipeline`` configuration file. This argument can be left out,
-but *one and only one file* with "pipeline" in the name should exist in the
-directory and contain the config information. Below is a sample template config
-
-.. tabs::
-
-    .. group-tab:: JSON
-        ::
-
-            {template_json_config}
-
-    .. group-tab:: YAML
-        ::
-
-            {template_yaml_config}
-
-    .. group-tab:: TOML
-        ::
-
-            {template_toml_config}
-
+Path to the ``pipeline`` configuration file. This argument can be
+left out, but *one and only one file* with "pipeline" in the
+name should exist in the directory and contain the config
+information. {sample_config}
 
 Parameters
 ----------
 pipeline : list of dicts
-    A list of dictionaries, where each dictionary represents one step
-    in the pipeline. Each dictionary should have one key-value pair,
-    where the key is the name of the CLI command to run, and the value
-    is the path to a config file containing the configuration for that
-    command.
+    A list of dictionaries, where each dictionary represents one
+    step in the pipeline. Each dictionary should have one
+    key-value pair, where the key is the name of the CLI
+    command to run, and the value is the path to a config
+    file containing the configuration for that command.
 logging : dict, optional
     Dictionary containing keyword-argument pairs to pass to
-    `init_logger <https://tinyurl.com/47hakp7f/>`_. This initializes
-    logging for the submission portion of the pipeline. Note, however,
-    that each step (command) will **also** record the submission step
-    log output to a common "project" log file, so it's only ever
-    necessary to use this input if you want a different (lower) level of
-    verbosity than the `log_level` specified in the config for the step
-    of the pipeline being executed.
+    `init_logger <https://tinyurl.com/47hakp7f/>`_. This
+    initializes logging for the submission portion of the
+    pipeline. Note, however, that each step (command) will
+    **also** record the submission step log output to a
+    common "project" log file, so it's only ever necessary
+    to use this input if you want a different (lower) level
+    of verbosity than the `log_level` specified in the
+    config for the step of the pipeline being executed.
 
 """
 
 BATCH_CONFIG_DOC = """
-Path to the ``batch`` configuration file. Below is a sample template config
-
-.. tabs::
-
-    .. group-tab:: JSON
-        ::
-
-            {template_json_config}
-
-    .. group-tab:: YAML
-        ::
-
-            {template_yaml_config}
-
-    .. group-tab:: TOML
-        ::
-
-            {template_toml_config}
-
+Path to the ``batch`` configuration file. {sample_config}
 
 Parameters
 ----------
 pipeline_config : str
-    Path to the pipeline configuration defining the commands to run for
-    every parametric set.
+    Path to the pipeline configuration defining the commands to
+    run for every parametric set.
 sets : list of dicts
-    A list of dictionaries, where each dictionary defines a "set" of
-    parametric runs. Each dictionary should have the following keys:
+    A list of dictionaries, where each dictionary defines a
+    "set" of parametric runs. Each dictionary should have
+    the following keys:
 
         args : dict
             A dictionary defining the arguments across all input
-            configuration files to parameterize. Each argument to be
-            parametrized should be a key in this dictionary, and the
-            value should be a list of the parameter values to run for
-            this argument.
+            configuration files to parameterize. Each argument
+            to be parametrized should be a key in this
+            dictionary, and the value should be a list of the
+            parameter values to run for this argument.
 
-            .. tabs::
+            {batch_args_dict}
+
+            Remember that the keys in the ``args`` dictionary
+            should be part of (at least) one of your other
+            configuration files.
+        files : list
+            A list of paths to the configuration files that
+            contain the arguments to be updated for every
+            parametric run. Arguments can be spread out over
+            multiple files. {batch_files}
+        set_tag : str, optional
+            Optional string defining a set tag that will prefix
+            each job tag for this set. This tag does not need to
+            include an underscore, as that is provided during
+            concatenation.
+
+
+"""
+_BATCH_ARGS_DICT = """.. tabs::
 
                 .. group-tab:: JSON
                     ::
@@ -199,8 +183,8 @@
                         {sample_toml_args_dict}
 
 
-            This example would run a total of six pipelines, one with
-            each of the following arg combinations:
+            This example would run a total of six pipelines, one
+            with each of the following arg combinations:
             ::
 
                 input_constant_1=18.20, path_to_a_file="/first/path.h5"
@@ -211,12 +195,8 @@
                 input_constant_1=19.04, path_to_a_file="/third/path.h5"
 
 
-            Remember that the keys in the ``args`` dictionary should be
-            part of (at least) one of your other configuration files.
-        files : list
-            A list of paths to the configuration files that contain the
-            arguments to be updated for every parametric run. Arguments
-            can be spread out over multiple files. For example:
+"""
+_BATCH_FILES = """For example:
 
             .. tabs::
 
@@ -236,16 +216,16 @@
                         {sample_toml_files}
 
 
-        set_tag : str, optional
-            Optional string defining a set tag that will prefix each
-            job tag for this set. This tag does not need to include an
-            underscore, as that is provided during concatenation.
-
-
 """
 
 CONFIG_DOC = """
-Path to the ``{name}`` configuration file. Below is a sample template config
+Path to the ``{name}`` configuration file. {sample_config}
+
+{docstring}
+
+Note that you may remove any keys with a ``null`` value if you do not intend to update them yourself.
+"""
+SAMPLE_CONFIG_DOC = """Below is a sample template config
 
 .. tabs::
 
@@ -264,10 +244,6 @@
 
             {template_toml_config}
 
-
-{docstring}
-
-Note that you may remove any keys with a ``null`` value if you do not intend to update them yourself.
 """
 COMMAND_DOC = """
 Execute the ``{name}`` step from a config file.
@@ -286,29 +262,38 @@
         arguments are:
 
         :option: ({{'local', 'kestrel', 'eagle', 'peregrine'}})
-                 Hardware run option. Determines the type of job scheduler
-                 tp use as well as the base AU cost.
-        :allocation: (str) HPC project (allocation) handle.
-        :walltime: (int) Node walltime request in hours.
-        :qos: (str, optional) Quality-of-service specifier. On Eagle or
-              Kestrel, this should be one of {{'standby', 'normal', 'high'}}.
-              Note that 'high' priority doubles the AU cost. By default,
-              ``"normal"``.
-        :memory: (int, optional) Node memory request in GB. By default,
-                 ``None``, which does not specify a memory limit.{n}{eep}
-        :queue: (str, optional; PBS ONLY) HPC queue to submit job to.
-                Examples include: 'debug', 'short', 'batch', 'batch-h',
-                'long', etc. By default, ``None``, which uses "test_queue".
-        :feature: (str, optional) Additional flags for SLURM job
-                  (e.g. "-p debug"). By default, ``None``, which does not
-                  specify any additional flags.
-        :conda_env: (str, optional) Name of conda environment to activate.
-                    By default, ``None``, which does not load any environments.
-        :module: (str, optional) Module to load. By default, ``None``, which
-                 does not load any modules.
-        :sh_script: (str, optional) Extra shell script to run before
-                    command call. By default, ``None``, which does not run any
-                    scripts.
+            Hardware run option. Determines the type of job
+            scheduler to use as well as the base AU cost.
+        :allocation: (str)
+            HPC project (allocation) handle.
+        :walltime: (int)
+            Node walltime request in hours.
+        :qos: (str, optional)
+            Quality-of-service specifier. On Eagle or
+            Kestrel, this should be one of {{'standby', 'normal',
+            'high'}}. Note that 'high' priority doubles the AU
+            cost. By default, ``"normal"``.
+        :memory: (int, optional)
+            Node memory request in GB. By default, ``None``, which
+            does not specify a memory limit.{n}{eep}
+        :queue: (str, optional; PBS ONLY)
+            HPC queue to submit job to. Examples include: 'debug',
+            'short', 'batch', 'batch-h', 'long', etc.
+            By default, ``None``, which uses "test_queue".
+        :feature: (str, optional)
+            Additional flags for SLURM job (e.g. "-p debug").
+            By default, ``None``, which does not specify any
+            additional flags.
+        :conda_env: (str, optional)
+            Name of conda environment to activate. By default,
+            ``None``, which does not load any environments.
+        :module: (str, optional)
+            Module to load. By default, ``None``, which does not
+            load any modules.
+        :sh_script: (str, optional)
+            Extra shell script to run before command call.
+            By default, ``None``, which does not run any
+            scripts.
 
         Only the `option` key is required for local execution. For
         execution on the HPC, the `allocation` and `walltime` keys are also
@@ -326,12 +311,13 @@
 
 """
 NODES_DOC = (
-    "\n        :nodes: (int, optional) Number of nodes to split the project "
-    "\n                points across. Note that the total number of requested "
-    "\n                nodes for a job may be larger than this value if the "
-    "\n                command splits across other inputs. Default is ``1``."
+    "\n        :nodes: (int, optional)"
+    "\n            Number of nodes to split the project points across. "
+    "\n            Note that the total number of requested nodes for "
+    "\n            a job may be larger than this value if the command"
+    "\n            splits across other inputs. Default is ``1``."
 )
-EXTRA_EXEC_PARAM_DOC = "\n        :{name}: ({type}) {desc}"
+EXTRA_EXEC_PARAM_DOC = "\n        :{name}: ({type})\n            {desc}"
 
 
 class CommandDocumentation:
@@ -484,7 +470,7 @@ def parameter_help(self):
             for p in doc["Parameters"]
             if p.name in self.template_config
         ]
-        return str(param_only_doc)
+        return "\n".join(_format_lines(str(param_only_doc).split("\n")))
 
     @property
     def extended_summary(self):
@@ -507,19 +493,27 @@ def config_help(self, command_name):
         str
             Help string for the config file.
         """
-        return CONFIG_DOC.format(
+        if _is_sphinx_build():
+            sample_config = SAMPLE_CONFIG_DOC.format(
+                template_json_config=config_as_str_for_docstring(
+                    self.template_config, config_type=ConfigType.JSON
+                ),
+                template_yaml_config=config_as_str_for_docstring(
+                    self.template_config, config_type=ConfigType.YAML
+                ),
+                template_toml_config=config_as_str_for_docstring(
+                    self.template_config, config_type=ConfigType.TOML
+                ),
+            )
+        else:
+            sample_config = ""
+
+        doc = CONFIG_DOC.format(
             name=command_name,
-            template_json_config=config_as_str_for_docstring(
-                self.template_config, config_type=ConfigType.JSON
-            ),
-            template_yaml_config=config_as_str_for_docstring(
-                self.template_config, config_type=ConfigType.YAML
-            ),
-            template_toml_config=config_as_str_for_docstring(
-                self.template_config, config_type=ConfigType.TOML
-            ),
+            sample_config=sample_config,
             docstring=self.parameter_help,
         )
+        return _cli_formatted(doc)
 
     def command_help(self, command_name):
         """Generate a help string for a command.
@@ -535,9 +529,8 @@ def command_help(self, command_name):
         str
             Help string for the command.
         """
-        return COMMAND_DOC.format(
-            name=command_name, desc=self.extended_summary
-        )
+        doc = COMMAND_DOC.format(name=command_name, desc=self.extended_summary)
+        return _cli_formatted(doc)
 
     @lru_cache(maxsize=16)
     def _param_value(self, param):
@@ -582,23 +575,32 @@ def _main_command_help(prog_name, commands):
     return MAIN_DOC.format(name=prog_name, cli_help_str=cli_help_str)
 
 
-def _pipeline_command_help(pipeline_config):
+def _pipeline_command_help(pipeline_config):  # pragma: no cover
     """Generate pipeline command help from a sample config."""
-    format_inputs = {}
+    if not _is_sphinx_build():
+        return _cli_formatted(PIPELINE_CONFIG_DOC.format(sample_config=""))
+
     template_names = [
         "template_json_config",
         "template_yaml_config",
         "template_toml_config",
     ]
-    for name, c_type in zip(template_names, CONFIG_TYPES):
-        format_inputs[name] = config_as_str_for_docstring(
-            pipeline_config, config_type=c_type
-        )
-    return PIPELINE_CONFIG_DOC.format(**format_inputs)
+    sample_config = SAMPLE_CONFIG_DOC.format(
+        **_format_dict(pipeline_config, template_names)
+    )
+    doc = PIPELINE_CONFIG_DOC.format(sample_config=sample_config)
+    return _cli_formatted(doc)
 
 
-def _batch_command_help():
+def _batch_command_help():  # pragma: no cover
     """Generate batch command help from a sample config."""
+    if not _is_sphinx_build():
+        doc = BATCH_CONFIG_DOC.format(
+            sample_config="", batch_args_dict="", batch_files=""
+        )
+        return _cli_formatted(doc)
+
+    format_inputs = {}
     template_config = {
         "pipeline_config": CommandDocumentation.REQUIRED_TAG,
         "sets": [
@@ -614,6 +616,16 @@ def _batch_command_help():
             },
         ],
     }
+    template_names = [
+        "template_json_config",
+        "template_yaml_config",
+        "template_toml_config",
+    ]
+
+    format_inputs["sample_config"] = SAMPLE_CONFIG_DOC.format(
+        **_format_dict(template_config, template_names)
+    )
+
     sample_args_dict = {
         "args": {
             "input_constant_1": [18.02, 19.04],
@@ -624,56 +636,88 @@ def _batch_command_help():
             ],
         }
     }
-    sample_files = {"files": ["./config_run.yaml", "./config_analyze.json"]}
-    format_inputs = {}
-    template_names = [
-        "template_json_config",
-        "template_yaml_config",
-        "template_toml_config",
-    ]
-    for name, c_type in zip(template_names, CONFIG_TYPES):
-        format_inputs[name] = config_as_str_for_docstring(
-            template_config, config_type=c_type
-        )
-
     sample_args_names = [
         "sample_json_args_dict",
         "sample_yaml_args_dict",
         "sample_toml_args_dict",
     ]
-    for name, c_type in zip(sample_args_names, CONFIG_TYPES):
-        format_inputs[name] = config_as_str_for_docstring(
-            sample_args_dict,
-            config_type=c_type,
-            num_spaces=20 if "json" in name else 24,
-        )
-        if "json" in name:
-            format_inputs[name] = "\n".join(
-                format_inputs[name].split("\n")[1:-1]
-            ).lstrip()
+    format_inputs["batch_args_dict"] = _BATCH_ARGS_DICT.format(
+        **_format_dict(sample_args_dict, sample_args_names, batch_docs=True)
+    )
 
+    sample_files = {"files": ["./config_run.yaml", "./config_analyze.json"]}
     sample_files_names = [
         "sample_json_files",
         "sample_yaml_files",
         "sample_toml_files",
     ]
-    for name, c_type in zip(sample_files_names, CONFIG_TYPES):
-        format_inputs[name] = config_as_str_for_docstring(
-            sample_files,
+    format_inputs["batch_files"] = _BATCH_FILES.format(
+        **_format_dict(sample_files, sample_files_names, batch_docs=True)
+    )
+
+    doc = BATCH_CONFIG_DOC.format(**format_inputs)
+    return _cli_formatted(doc)
+
+
+def _format_dict(sample, names, batch_docs=False):  # pragma: no cover
+    """Format a sample into a documentation config"""
+    configs = {}
+    for name, c_type in zip(names, CONFIG_TYPES):
+        configs[name] = config_as_str_for_docstring(
+            sample,
             config_type=c_type,
-            num_spaces=20 if "json" in name else 24,
+            num_spaces=(20 if "json" in name else 24) if batch_docs else 12,
         )
-        if "json" in name:
-            format_inputs[name] = "\n".join(
-                format_inputs[name].split("\n")[1:-1]
-            ).lstrip()
-
-    return BATCH_CONFIG_DOC.format(**format_inputs)
+        if batch_docs and "json" in name:
+            configs[name] = "\n".join(configs[name].split("\n")[1:-1]).lstrip()
+    return configs
 
 
 def _as_functions(functions):
-    """Yield items from input, converting all classes to their init funcs"""
+    """Yield items from input, converting all classes to their init methods"""
     for func in functions:
         if isclass(func):
             func = func.__init__
         yield func
+
+
+def _line_needs_newline(line):
+    """Determine wether a newline should be added to the current line"""
+    if any(
+        f":{key}:" in line
+        for key in chain(DEFAULT_EXEC_VALUES, EXTRA_EXEC_PARAMS)
+    ):
+        return True
+    if not line.startswith("    "):
+        return True
+    if len(line) <= 4:
+        return True
+    if line[4] == " ":
+        return True
+    return False
+
+
+def _format_lines(lines):
+    """Format docs into longer lines of text for easier wrapping in CLI"""
+    new_lines = [lines[0]]
+    current_line = []
+    for line in lines[1:]:
+        if _line_needs_newline(line):
+            current_line = " ".join(current_line)
+            if current_line:
+                line = "\n".join([current_line, line])
+            new_lines.append(line)
+            current_line = []
+        else:
+            if current_line:
+                line = line.lstrip()
+            current_line.append(line)
+
+    return new_lines
+
+
+def _cli_formatted(doc):
+    """Apply minor formatting changes for strings when displaying to CLI"""
+    if not _is_sphinx_build():
+        doc = doc.replace("``", "`").replace("{{", "{").replace("}}", "}")
+    return doc
diff --git a/tests/cli/test_cli_documentation.py b/tests/cli/test_cli_documentation.py
index 0cb6e03f..668a5b66 100644
--- a/tests/cli/test_cli_documentation.py
+++ b/tests/cli/test_cli_documentation.py
@@ -9,6 +9,7 @@
 
 import pytest
 
+import gaps.cli.documentation
 from gaps.cli.documentation import (
     DEFAULT_EXEC_VALUES,
     EXTRA_EXEC_PARAMS,
@@ -36,7 +37,13 @@ def test_command_documentation_copies_skip_params():
 def test_command_documentation_extra_exec_params():
     """Test the `CommandDocumentation` with extra exec params."""
 
-    def func(max_workers, sites_per_worker, memory_utilization_limit, timeout, pool_size):
+    def func(
+        max_workers,
+        sites_per_worker,
+        memory_utilization_limit,
+        timeout,
+        pool_size,
+    ):
         """A short description.
 
         Parameters
@@ -86,7 +93,13 @@ def func(max_workers, sites_per_worker, memory_utilization_limit, timeout, pool_
 def test_command_documentation_extra_exec_params_no_user_doc():
     """Test the `CommandDocumentation` with extra exec params no user doc."""
 
-    def func(max_workers, sites_per_worker, memory_utilization_limit, timeout, pool_size):
+    def func(
+        max_workers,
+        sites_per_worker,
+        memory_utilization_limit,
+        timeout,
+        pool_size,
+    ):
         """A short description."""
 
     expected_parameters = [
@@ -370,16 +383,23 @@ def func():
     assert doc.extended_summary == expected_str
 
 
-def test_command_documentation_config_help():
+def test_command_documentation_config_help(monkeypatch):
     """Test `CommandDocumentation.config_help`."""
 
+    monkeypatch.setattr(
+        gaps.cli.documentation, "_is_sphinx_build", lambda: True, raising=True
+    )
+
     doc = CommandDocumentation(func_no_args, is_split_spatially=True)
     config_help = doc.config_help(command_name="my_command_name")
 
     assert "my_command_name" in config_help
-    assert doc.parameter_help in config_help
+    assert (
+        gaps.cli.documentation._cli_formatted(doc.parameter_help)
+        in config_help
+    )
     assert ".. tabs::" in config_help
-    assert ".. tab::" in config_help
+    assert ".. group-tab::" in config_help
 
 
 def test_command_documentation_command_help():

From ddee6aeea854d00e5ef803e76f00f5e4457d666f Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Wed, 9 Aug 2023 10:46:26 -0600
Subject: [PATCH 50/61] Minor formatting

---
 gaps/cli/documentation.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py
index 0cf6974b..e3d06ee1 100644
--- a/gaps/cli/documentation.py
+++ b/gaps/cli/documentation.py
@@ -30,17 +30,17 @@
 EXTRA_EXEC_PARAMS = {
     "max_workers": """Maximum number of parallel workers run on each node.""",
     "sites_per_worker": """Number of sites to run in series on a worker.""",
-    "memory_utilization_limit": """Memory utilization limit (fractional). Must be a value
-                between 0 and 100. This input sets how much data will
-                be stored in-memory at any given time before flushing
-                to disk.
-        """,
+    "memory_utilization_limit": """Memory utilization limit (fractional).
+                Must be a value between 0 and 100. This input sets
+                how much data will be stored in-memory at any
+                given time before flushing to disk.
+    """,
     "timeout": """Number of seconds to wait for parallel run iteration
                 to complete before early termination.
-        """,
+    """,
     "pool_size": """Number of futures to submit to a single process pool
                 for parallel futures.
-        """,
+    """,
 }
 
 CONFIG_TYPES = [

From bb6014a9b45096e9001ebf39c2ca872da8c4b33c Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Wed, 9 Aug 2023 11:02:45 -0600
Subject: [PATCH 51/61] Add zfill fix

---
 gaps/cli/config.py           |  2 +-
 tests/cli/test_cli_config.py | 50 ++++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/gaps/cli/config.py b/gaps/cli/config.py
index d3b7f33c..85b35c2f 100644
--- a/gaps/cli/config.py
+++ b/gaps/cli/config.py
@@ -228,7 +228,7 @@ def kickoff_jobs(self):
         jobs = sorted(product(*lists_to_run))
         num_jobs_submit = len(jobs)
         self._warn_about_excessive_au_usage(num_jobs_submit)
-        n_zfill = len(str(num_jobs_submit))
+        n_zfill = len(str(max(0, num_jobs_submit - 1)))
         extra_exec_args = {}
         for param in EXTRA_EXEC_PARAMS:
             if param in self.exec_kwargs:
diff --git a/tests/cli/test_cli_config.py b/tests/cli/test_cli_config.py
index a39ff673..82d29072 100644
--- a/tests/cli/test_cli_config.py
+++ b/tests/cli/test_cli_config.py
@@ -379,6 +379,56 @@ def test_run_multiple_nodes(
             )
 
 
+@pytest.mark.parametrize("test_class", [False, True])
+def test_run_multiple_nodes_correct_zfill(
+    test_ctx, runnable_script, monkeypatch, test_class, job_names_cache
+):
+    """Test the `run` function calls `_kickoff_hpc_job` for multiple nodes."""
+
+    tmp_path = test_ctx.obj["TMP_PATH"]
+
+    if test_class:
+        command_config = CLICommandFromClass(
+            TestCommand,
+            "run",
+            name="run",
+            split_keys={"project_points", "_z_0"},
+        )
+    else:
+        command_config = CLICommandFromFunction(
+            _testing_function,
+            name="run",
+            split_keys={"project_points", "_z_0"},
+        )
+
+    config = {
+        "execution_control": {
+            "option": "eagle",
+            "allocation": "test",
+            "walltime": 1,
+            "nodes": 5,
+            "max_workers": 1,
+        },
+        "input1": 1,
+        "input2": 7,
+        "input3": 8,
+        "_z_0": ["unsorted", "strings"],
+        "project_points": [0, 1, 2, 4, 5, 6, 7, 8, 9],
+    }
+
+    config_fp = tmp_path / "config.json"
+    with open(config_fp, "w") as config_file:
+        json.dump(config, config_file)
+
+    assert len(job_names_cache) == 0
+    from_config(config_fp, command_config)
+    assert len(job_names_cache) == 10
+    assert len(set(job_names_cache)) == 10
+
+    assert not any("j00" in job_name for job_name in job_names_cache)
+    assert any("j0" in job_name for job_name in job_names_cache)
+
+
 @pytest.mark.parametrize("test_class", [False, True])
 def test_run_no_split_keys(
     test_ctx, runnable_script, monkeypatch, test_class, job_names_cache

From 31ecfb81a6535f5ae78540c3b8cfd74838bcc145 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Wed, 9 Aug 2023 12:17:02 -0600
Subject: [PATCH 52/61] Add support for JSON5

---
 gaps/cli/documentation.py |  8 ++++----
 gaps/config.py            | 43 +++++++++++++++++++++++++++++++++++++++
 requirements.txt          |  3 +++
 3 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py
index e3d06ee1..df1a6a52 100644
--- a/gaps/cli/documentation.py
+++ b/gaps/cli/documentation.py
@@ -59,7 +59,7 @@
     $ {name} template-configs
 
 By default, this generates template JSON configuration files, though you
-can request YAML or TOML configuration files instead. You can run
+can request JSON5, YAML, or TOML configuration files instead. You can run
 ``$ {name} template-configs --help`` on the command line to see all available
 options for the ``template-configs`` command. Once the template configuration
 files have been generated, you can fill them out by referring to the
@@ -167,7 +167,7 @@
 """
 _BATCH_ARGS_DICT = """.. tabs::
 
-                .. group-tab:: JSON
+                .. group-tab:: JSON/JSON5
                     ::
 
                         {sample_json_args_dict}
@@ -200,7 +200,7 @@
 
             .. tabs::
 
-                .. group-tab:: JSON
+                .. group-tab:: JSON/JSON5
                     ::
 
                         {sample_json_files}
@@ -229,7 +229,7 @@
 
 .. tabs::
 
-    .. group-tab:: JSON
+    .. group-tab:: JSON/JSON5
         ::
 
             {template_json_config}
diff --git a/gaps/config.py b/gaps/config.py
index 7fe73598..a5a6d397 100644
--- a/gaps/config.py
+++ b/gaps/config.py
@@ -9,6 +9,7 @@
 import json
 import yaml
 import toml
+import pyjson5
 
 from gaps.log import init_logger
 from gaps.utilities import CaseInsensitiveEnum, resolve_path
@@ -17,6 +18,18 @@
 _CONFIG_HANDLER_REGISTRY = {}
 
 
+# pylint: disable=too-few-public-methods
+class _JSON5Formatter:
+    """Format input JSON5 data with indentation."""
+
+    def __init__(self, data):
+        self.data = data
+
+    def _format_as_json(self):
+        """Format the data input with as string with indentation."""
+        return json.dumps(self.data, indent=4)
+
+
 class Handler(ABC):
     """ABC for configuration file handler."""
 
@@ -84,6 +97,36 @@ def loads(cls, config_str):
         return json.loads(config_str)
 
 
+# pylint: disable=no-member
+class JSON5Handler(Handler):
+    """JSON5 config file handler."""
+
+    FILE_EXTENSION = "json5"
+
+    @classmethod
+    def dump(cls, config, stream):
+        """Write the config to a stream (JSON5 file)."""
+        return pyjson5.encode_io(
+            _JSON5Formatter(config),
+            stream,
+            supply_bytes=False,
+            tojson="_format_as_json",
+        )
+
+    @classmethod
+    def dumps(cls, config):
+        """Convert the config to a JSON5 string."""
+        return pyjson5.encode(
+            _JSON5Formatter(config),
+            tojson="_format_as_json",
+        )
+
+    @classmethod
+    def loads(cls, config_str):
+        """Parse the JSON5 string into a config dictionary."""
+        return pyjson5.decode(config_str, maxdepth=-1)
+
+
 class YAMLHandler(Handler):
     """YAML config file handler."""
 
diff --git a/requirements.txt b/requirements.txt
index 4aa083b2..325f0e5f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,9 +5,12 @@ numpy>=1.24.2
 numpydoc>=1.5.0
 pandas>=2.0.0
 psutil>=5.9.2
+pyjson5>=1.6.4
+pyyaml>=6.0
 setuptools>=65.4.0
 sphinx>=5.3.0
 sphinx-copybutton>=0.5.1
 sphinx-click>=4.4.0
 sphinx-tabs>=3.4.1
 tabulate>=0.9.0
+toml>=0.10.2

From d19d24d9c7ff3e19a260ca6ce786c3b5bd83e542 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Wed, 9 Aug 2023 12:35:04 -0600
Subject: [PATCH 53/61] Add GHA to run reV CLI tests

---
 .github/workflows/pr_rev_cli_tests.yml | 40 ++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 .github/workflows/pr_rev_cli_tests.yml

diff --git a/.github/workflows/pr_rev_cli_tests.yml b/.github/workflows/pr_rev_cli_tests.yml
new file mode 100644
index 00000000..ee0cbc9d
--- /dev/null
+++ b/.github/workflows/pr_rev_cli_tests.yml
@@ -0,0 +1,40 @@
+name: reV CLI Tests
+
+on: pull_request
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: checkout gaps
+      uses: actions/checkout@v2
+      with:
+        ref: ${{ github.event.pull_request.head.ref }}
+        fetch-depth: 1
+        path: gaps
+    - name: checkout reV
+      uses: actions/checkout@v2
+      with:
+        repository: nrel/reV
+        fetch-depth: 1
+        path: reV
+    - name: Set up Python
+      uses: conda-incubator/setup-miniconda@v2
+      with:
+        auto-update-conda: true
+        python-version: 3.9
+    - name: Install reV
+      working-directory: ./reV
+      shell: bash -l {0}
+      run: |
+        pip install -e .
+    - name: Install gaps
+      working-directory: ./gaps
+      shell: bash -l {0}
+      run: |
+        pip install -e .
+    - name: Run reV CLI tests
+      working-directory: ./reV
+      shell: bash -l {0}
+      run: |
+        pytest -k cli -v --disable-warnings

From da5fc8dc3212791460d0d3332d41c0ef6154535d Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Wed, 9 Aug 2023 14:27:30 -0600
Subject: [PATCH 54/61] Expanded collection tests slightly

---
 tests/cli/test_cli.py | 47 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 37 insertions(+), 10 deletions(-)

diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py
index f06b4a03..2055dbd1 100644
--- a/tests/cli/test_cli.py
+++ b/tests/cli/test_cli.py
@@ -40,6 +40,23 @@ def runnable_script():
         _CMD_LIST.pop(0)
 
 
+def _copy_single_file(
+    project_points, source_dir, dest_dir, file_pattern, max_workers
+):
+    """Test function that copies over data files."""
+    time.sleep(3)
+    assert project_points.gids == PROJECT_POINTS
+    assert max_workers == MAX_WORKERS
+    out_files = []
+    for in_file in sorted(Path(source_dir).glob(file_pattern)):
+        out_file_name = file_pattern.replace("*", "")
+        out_file = str(Path(dest_dir) / out_file_name)
+        shutil.copy(in_file, out_file)
+        out_files.append(out_file)
+        break
+    return out_files
+
+
 def _copy_files(
     project_points, source_dir, dest_dir, file_pattern, max_workers
 ):
@@ -100,17 +117,20 @@ def test_make_cli():
 
 
 @pytest.mark.integration
+@pytest.mark.parametrize("test_single_file", [True, False])
 def test_cli(
     tmp_cwd,
     cli_runner,
     collect_pattern,
     manual_collect,
     runnable_script,
+    test_single_file,
     assert_message_was_logged,
 ):
     """Integration test of `make_cli`"""
 
     data_dir, file_pattern = collect_pattern
+    run_func = _copy_single_file if test_single_file else _copy_files
 
     def preprocess_run_config(config, project_dir, out_dir):
         assert project_dir == out_dir
@@ -121,7 +141,7 @@ def preprocess_run_config(config, project_dir, out_dir):
 
     commands = [
         CLICommandFromFunction(
-            _copy_files,
+            run_func,
             name="run",
             add_collect=True,
             split_keys=["project_points"],
@@ -163,7 +183,7 @@ def preprocess_run_config(config, project_dir, out_dir):
         main, ["pipeline", "-c", pipe_config_fp.as_posix()]
     )
     assert len(set((tmp_cwd / "logs").glob("*run*"))) == 1
-    assert len(set(tmp_cwd.glob(file_pattern))) == 4
+    assert len(set(tmp_cwd.glob(file_pattern))) == 1 if test_single_file else 4
     assert tmp_cwd / "logs" in set(tmp_cwd.glob("*"))
 
     result = cli_runner.invoke(main, ["status"])
@@ -183,6 +203,12 @@ def preprocess_run_config(config, project_dir, out_dir):
             string in lines[-ind] for string in partial.split()
         ), err_msg
 
+    with open(collect_config_fp, "r") as config_file:
+        config = json.load(config_file)
+    config["collect_pattern"] = {"out.h5": "./*.h5"}
+    with open(collect_config_fp, "w") as config_file:
+        json.dump(config, config_file)
+
     assert tmp_cwd / "chunk_files" not in set(tmp_cwd.glob("*"))
     result = cli_runner.invoke(
         main,
@@ -199,14 +225,15 @@ def preprocess_run_config(config, project_dir, out_dir):
     h5_files = set(tmp_cwd.glob("*.h5"))
     assert len(h5_files) == 1
 
-    with h5py.File(h5_files.pop(), "r") as collected_outputs:
-        assert len(collected_outputs.keys()) == 5
-        assert "cf_mean" in collected_outputs
-        assert "lcoe_fcr" in collected_outputs
-        cf_profiles = collected_outputs["cf_profile"][...]
+    if not test_single_file:
+        with h5py.File(h5_files.pop(), "r") as collected_outputs:
+            assert len(collected_outputs.keys()) == 5
+            assert "cf_mean" in collected_outputs
+            assert "lcoe_fcr" in collected_outputs
+            cf_profiles = collected_outputs["cf_profile"][...]
 
-    profiles = manual_collect(data_dir / file_pattern, "cf_profile")
-    assert np.allclose(profiles, cf_profiles)
+        profiles = manual_collect(data_dir / file_pattern, "cf_profile")
+        assert np.allclose(profiles, cf_profiles)
 
     result = cli_runner.invoke(
         main, ["pipeline", "-c", pipe_config_fp.as_posix()]
@@ -233,7 +260,7 @@ def preprocess_run_config(config, project_dir, out_dir):
         assert project_dir == out_dir
         config["dest_dir"] = str(project_dir)
         config["source_dir"] = str(data_dir)
-        config["file_pattern"] = file_pattern
+        config["file_pattern"] = f"./{file_pattern}"
         return config
 
     commands = [

From aba2f9c18e30a1c6c304028cc1891916a71a26eb Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Wed, 9 Aug 2023 18:23:23 -0600
Subject: [PATCH 55/61] Updated status output

---
 gaps/cli/status.py           | 304 +++++++++++++++++++++++++----------
 gaps/status.py               |   2 +-
 tests/cli/test_cli.py        | 178 ++++++++++----------
 tests/cli/test_cli_status.py |  54 +++++--
 4 files changed, 346 insertions(+), 192 deletions(-)

diff --git a/gaps/cli/status.py b/gaps/cli/status.py
index 83f3d896..e7e05834 100644
--- a/gaps/cli/status.py
+++ b/gaps/cli/status.py
@@ -9,7 +9,7 @@
 import psutil
 import pandas as pd
 from colorama import init, Fore, Style
-from tabulate import tabulate
+from tabulate import tabulate, SEPARATING_LINE
 
 from gaps.status import (
     DT_FMT,
@@ -24,6 +24,7 @@
 from gaps.cli.command import _WrappedCommand
 
 
+JOB_STATUS_COL = StatusField.JOB_STATUS.value
 FAILURE_STRINGS = ["failure", "fail", "failed", "f"]
 RUNNING_STRINGS = ["running", "run", "r"]
 SUBMITTED_STRINGS = ["submitted", "submit", "sb", "pending", "pend", "p"]
@@ -89,20 +90,79 @@ def _filter_df_for_status(df, status_request):
     return df.copy()
 
 
-def _color_print(df, print_folder, commands, status):
-    """Color the status portion of the print out."""
+def _calculate_runtime_stats(df):
+    """Calculate df runtime statistics"""
+
+    run_times_seconds = df[StatusField.RUNTIME_SECONDS]
+    if run_times_seconds.isna().any():
+        return pd.DataFrame()
+    runtime_stats = pd.DataFrame(run_times_seconds.astype(float).describe())
+    runtime_stats = runtime_stats.T[["min", "mean", "50%", "max"]]
+    runtime_stats = runtime_stats.rename(columns={"50%": "median"})
+    # runtime_stats = runtime_stats.rename(
+    #     columns={
+    #         col: f"{Fore.MAGENTA}{col}{Style.RESET_ALL}"
+    #         for col in runtime_stats.columns
+    #     }
+    # )
+    runtime_stats.index = ["Node runtime"]
+    runtime_stats = runtime_stats.T
+    runtime_stats["Node runtime"] = runtime_stats["Node runtime"].apply(
+        _elapsed_time_as_str
+    )
+    return pd.DataFrame(runtime_stats).reset_index()
 
-    def color_string(string):
-        if string == StatusOption.FAILED:
-            string = f"{Fore.RED}{string}{Style.RESET_ALL}"
-        elif string == StatusOption.SUCCESSFUL:
-            string = f"{Fore.GREEN}{string}{Style.RESET_ALL}"
-        elif string == StatusOption.RUNNING:
-            string = f"{Fore.BLUE}{string}{Style.RESET_ALL}"
-        else:
-            string = f"{Fore.YELLOW}{string}{Style.RESET_ALL}"
-        return string
 
+def _calculate_walltime(df):
+    """Calculate total project walltime"""
+    all_jobs_failed = (df[StatusField.JOB_STATUS] == StatusOption.FAILED).all()
+    all_end_times_missing = df[StatusField.TIME_END].isna().all()
+    if all_jobs_failed and all_end_times_missing:
+        return 0
+
+    start_time = df[StatusField.TIME_SUBMITTED].fillna(
+        dt.datetime.now().strftime(DT_FMT)
+    )
+    start_time = pd.to_datetime(start_time, format=DT_FMT).min()
+
+    end_time = df[StatusField.TIME_END].fillna(
+        dt.datetime.now().strftime(DT_FMT)
+    )
+    end_time = pd.to_datetime(end_time, format=DT_FMT).max()
+    return (end_time - start_time).total_seconds()
+
+
+def _calculate_aus(df):
+    """Calculate the number of AU's spent by jobs"""
+    run_times_seconds = df[StatusField.RUNTIME_SECONDS]
+
+    hardware_charge_factors = df[StatusField.HARDWARE].apply(
+        _extract_qos_charge_factor, enum_class=HardwareOption
+    )
+    qos_charge_factors = df[StatusField.QOS].apply(
+        _extract_qos_charge_factor, enum_class=QOSOption
+    )
+    aus_used = (
+        run_times_seconds / 3600 * hardware_charge_factors * qos_charge_factors
+    )
+    return int(aus_used.sum())
+
+
+def _color_string(string):
+    """Color string value based on status option"""
+    if string == StatusOption.FAILED:
+        string = f"{Fore.RED}{string}{Style.RESET_ALL}"
+    elif string == StatusOption.SUCCESSFUL:
+        string = f"{Fore.GREEN}{string}{Style.RESET_ALL}"
+    elif string == StatusOption.RUNNING:
+        string = f"{Fore.BLUE}{string}{Style.RESET_ALL}"
+    else:
+        string = f"{Fore.YELLOW}{string}{Style.RESET_ALL}"
+    return string
+
+
+def _print_intro(print_folder, commands, status, monitor_pid):
+    """Print intro including project folder and command/status filters."""
     extras = []
     commands = ", ".join(commands or [])
     if commands:
@@ -116,52 +176,127 @@ def color_string(string):
     else:
         extras = ""
 
-    pid = df.monitor_pid
-    total_runtime_seconds = df.total_runtime_seconds
-    total_aus_used = int(df.total_aus_used)
-    walltime = df.walltime
-    name = f"\n{Fore.CYAN}{print_folder}{extras}{Style.RESET_ALL}:"
-    job_status = StatusField.JOB_STATUS
-    df[job_status.value] = df[job_status].apply(color_string)
+    print(f"\n{Fore.CYAN}{print_folder}{extras}{Style.RESET_ALL}:")
+    if monitor_pid is not None and psutil.pid_exists(monitor_pid):
+        print(f"{Fore.MAGENTA}MONITOR PID: {monitor_pid}{Style.RESET_ALL}")
+
+
+def _print_df(df):
+    """Print main status body (table of job statuses)"""
+    df[JOB_STATUS_COL] = df[JOB_STATUS_COL].apply(_color_string)
     df = df.fillna("--")
+
+    pdf = pd.concat([df, pd.DataFrame({JOB_STATUS_COL: [SEPARATING_LINE]})])
     pdf = tabulate(
-        df,
+        pdf,
         showindex=True,  # cspell:disable-line
         headers=df.columns,
         tablefmt="simple",  # cspell:disable-line
         disable_numparse=[3],  # cspell:disable-line
     )
-    print(name)
-    if pid is not None and psutil.pid_exists(pid):
-        print(f"{Fore.MAGENTA}MONITOR PID: {pid}{Style.RESET_ALL}")
+    pdf = pdf.split("\n")
+    pdf[-1] = pdf[-1].replace(" ", "-")
+    pdf = "\n".join(pdf)
     print(pdf)
-    if total_runtime_seconds is not None:
-        runtime_str = (
-            f"Total Runtime: {_elapsed_time_as_str(total_runtime_seconds)}"
-        )
-        # TODO: make prettier
-        # divider = "".join(["-"] * len(runtime_str))
-        # divider = "".join(["~"] * 3)
-        divider = ""
-        print(divider)
-        print(f"{Style.BRIGHT}{runtime_str}{Style.RESET_ALL}")
-        if walltime > 2:
-            walltime_str = (
-                f"Total project wall time (including queue and downtime): "
-                f"{_elapsed_time_as_str(walltime)}"
-            )
-            print(f"{Style.BRIGHT}{walltime_str}{Style.RESET_ALL}")
-        if total_aus_used > 0:
-            au_str = f"Total AUs spent: {total_aus_used:,}"
-            print(f"{Style.BRIGHT}{au_str}{Style.RESET_ALL}")
-        print(
-            f"{Style.BRIGHT}**Statistics only include shown jobs (excluding "
-            f"any previous runs)**{Style.RESET_ALL}"
-        )
-    print()
 
 
-def main_monitor(folder, commands, status, include):
+def _print_job_status_statistics(df):
+    """Print job status statistics."""
+    statistics_str = f"Total number of jobs: {df.shape[0]}"
+    counts = pd.DataFrame(df[JOB_STATUS_COL].value_counts()).reset_index()
+    counts = tabulate(
+        # pylint: disable=unsubscriptable-object
+        counts[["count", JOB_STATUS_COL]].values,
+        showindex=False,  # cspell:disable-line
+        headers=counts.columns,
+        tablefmt="simple",  # cspell:disable-line
+        disable_numparse=[1],  # cspell:disable-line
+    )
+    counts = "\n".join(
+        [f"  {substr.lstrip()}" for substr in counts.split("\n")[2:]]
+    )
+    print(f"{Style.BRIGHT}{statistics_str}{Style.RESET_ALL}")
+    print(counts)
+
+
+def _print_runtime_stats(runtime_stats, total_runtime_seconds):
+    """Print node runtime statistics"""
+
+    runtime_str = (
+        f"Total node runtime: "
+        f"{_elapsed_time_as_str(total_runtime_seconds)}"
+    )
+    print(f"{Style.BRIGHT}{runtime_str}{Style.RESET_ALL}")
+    if runtime_stats.empty:
+        return
+
+    runtime_stats = tabulate(
+        runtime_stats,
+        showindex=False,  # cspell:disable-line
+        headers=runtime_stats.columns,
+        tablefmt="simple",  # cspell:disable-line
+    )
+    runtime_stats = "\n".join(
+        [f"  {substr}" for substr in runtime_stats.split("\n")[2:]]
+    )
+    print(runtime_stats)
+
+
+def _print_au_usage(total_aus_used):
+    """Print the job AU usage."""
+    if total_aus_used <= 0:
+        return
+
+    au_str = f"Total AUs spent: {total_aus_used:,}"
+    print(f"{Style.BRIGHT}{au_str}{Style.RESET_ALL}")
+
+
+def _print_total_walltime(walltime):
+    """Print the total project walltime."""
+    if walltime <= 2:
+        return
+    walltime_str = (
+        f"Total project wall time (including queue and downtime "
+        f"between steps): {_elapsed_time_as_str(walltime)}"
+    )
+    print(f"{Style.BRIGHT}{walltime_str}{Style.RESET_ALL}")
+
+
+def _print_disclaimer():
+    """Print disclaimer about statistics."""
+    print(
+        f"{Style.BRIGHT}**Statistics only include shown jobs (excluding "
+        f"any previous runs or other steps)**{Style.RESET_ALL}"
+    )
+
+
+def _color_print(
+    df,
+    print_folder,
+    commands,
+    status,
+    walltime,
+    runtime_stats,
+    total_aus_used,
+    monitor_pid=None,
+    total_runtime_seconds=None,
+):
+    """Color the status portion of the print out."""
+
+    _print_intro(print_folder, commands, status, monitor_pid)
+    _print_df(df)
+    _print_job_status_statistics(df)
+
+    if total_runtime_seconds is None:
+        return
+
+    _print_runtime_stats(runtime_stats, total_runtime_seconds)
+    _print_au_usage(total_aus_used)
+    _print_total_walltime(walltime)
+    _print_disclaimer()
+
+
+def main_monitor(folder, commands, status, include, recursive):
     """Run the appropriate monitor functions for a folder.
 
     Parameters
@@ -174,16 +309,22 @@ def main_monitor(folder, commands, status, include):
         Container with the statuses to display.
     include : container of str
         Container of extra keys to pull from the status files.
+    recursive : bool
+        Option to perform recursive search of directories.
     """
 
     init()
-    folder = Path(folder).expanduser().resolve()
-    for directory in chain([folder], folder.rglob("*")):
+    folders = [Path(folder).expanduser().resolve()]
+    if recursive:
+        folders = chain(folders, folders[0].rglob("*"))
+
+    for directory in folders:
         if not directory.is_dir():
             continue
 
         pipe_status = Status(directory)
         if not pipe_status:
+            print(f"No non-empty status file found in {str(directory)!r}. ")
             continue
 
         include_with_runtime = list(include) + [StatusField.RUNTIME_SECONDS]
@@ -193,42 +334,28 @@ def main_monitor(folder, commands, status, include):
         if status:
             df = _filter_df_for_status(df, status)
 
-        run_times_seconds = df[StatusField.RUNTIME_SECONDS]
-        hardware_charge_factors = df[StatusField.HARDWARE].apply(
-            _extract_qos_charge_factor, enum_class=HardwareOption
-        )
-        qos_charge_factors = df[StatusField.QOS].apply(
-            _extract_qos_charge_factor, enum_class=QOSOption
-        )
-        aus_used = (
-            run_times_seconds
-            / 3600
-            * hardware_charge_factors
-            * qos_charge_factors
-        )
-        df = df[list(df.columns)[:-1]]
-        df.monitor_pid = pipe_status.get(StatusField.MONITOR_PID)
-        df.total_runtime_seconds = run_times_seconds.sum()
-        df.total_aus_used = aus_used.sum()
-
-        all_jobs_failed = (
-            df[StatusField.JOB_STATUS] == StatusOption.FAILED
-        ).all()
-        all_end_times_missing = df[StatusField.TIME_END].isna().all()
-        if all_jobs_failed and all_end_times_missing:
-            df.walltime = 0
-        else:
-            start_time = df[StatusField.TIME_SUBMITTED].fillna(
-                dt.datetime.now().strftime(DT_FMT)
+        if df.empty:
+            print(
+                f"No status data found to display for {str(directory)!r}. "
+                "Please check your filters and try again."
             )
-            start_time = pd.to_datetime(start_time, format=DT_FMT).min()
+            continue
 
-            end_time = df[StatusField.TIME_END].fillna(
-                dt.datetime.now().strftime(DT_FMT)
-            )
-            end_time = pd.to_datetime(end_time, format=DT_FMT).max()
-            df.walltime = (end_time - start_time).total_seconds()
-        _color_print(df, directory.name, commands, status)
+        runtime_stats = _calculate_runtime_stats(df)
+        aus_used = _calculate_aus(df)
+        walltime = _calculate_walltime(df)
+
+        _color_print(
+            df[list(df.columns)[:-1]].copy(),
+            directory.name,
+            commands,
+            status,
+            walltime,
+            runtime_stats,
+            total_aus_used=aus_used,
+            monitor_pid=pipe_status.get(StatusField.MONITOR_PID),
+            total_runtime_seconds=df[StatusField.RUNTIME_SECONDS].sum(),
+        )
 
 
 def status_command():
@@ -271,6 +398,13 @@ def status_command():
             "this option (e.g. :code:`-i key1 -i key2 ...`) By default, no "
             "extra keys are displayed.",
         ),
+        click.Option(
+            param_decls=["--recursive", "-r"],
+            is_flag=True,
+            help="Option to perform a recursive search of directories "
+            "(starting with the input directory). The status of every nested "
+            "directory is reported.",
+        ),
     ]
 
     return _WrappedCommand(
diff --git a/gaps/status.py b/gaps/status.py
index 499627e4..e28c6866 100644
--- a/gaps/status.py
+++ b/gaps/status.py
@@ -761,7 +761,7 @@ def _elapsed_time_as_str(seconds_elapsed):
     hours, minutes = divmod(minutes, 60)
     time_str = f"{hours:d}:{minutes:02d}:{seconds:02d}"
     if days:
-        time_str = f"{days:d} day{'s' if abs(days) != 1 else ''}, {time_str}"
+        time_str = f"{days:,d} day{'s' if abs(days) != 1 else ''}, {time_str}"
     return time_str
 
 
diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py
index 2055dbd1..ed648e29 100644
--- a/tests/cli/test_cli.py
+++ b/tests/cli/test_cli.py
@@ -197,7 +197,7 @@ def preprocess_run_config(config, project_dir, out_dir):
         "collect-run not submitted",
     ]
 
-    for ind, partial in enumerate(expected_partial_lines[::-1], start=7):
+    for ind, partial in enumerate(expected_partial_lines[::-1], start=9):
         err_msg = f"{partial!r} not in {lines[-ind]!r}. All lines: {lines}"
         assert all(
             string in lines[-ind] for string in partial.split()
@@ -243,94 +243,94 @@ def preprocess_run_config(config, project_dir, out_dir):
     assert_message_was_logged("is complete.", "INFO")
 
 
-@pytest.mark.integration
-def test_cli_monitor(
-    tmp_cwd,
-    cli_runner,
-    collect_pattern,
-    manual_collect,
-    runnable_script,
-    assert_message_was_logged,
-):
-    """Integration test of `make_cli` with monitor"""
-
-    data_dir, file_pattern = collect_pattern
-
-    def preprocess_run_config(config, project_dir, out_dir):
-        assert project_dir == out_dir
-        config["dest_dir"] = str(project_dir)
-        config["source_dir"] = str(data_dir)
-        config["file_pattern"] = f"./{file_pattern}"
-        return config
-
-    commands = [
-        CLICommandFromFunction(
-            _copy_files,
-            name="run",
-            add_collect=True,
-            split_keys=["project_points"],
-            config_preprocessor=preprocess_run_config,
-        )
-    ]
-
-    main = make_cli(commands)
-
-    assert not set(tmp_cwd.glob("*"))
-    cli_runner.invoke(main, ["template-configs"])
-    files = set(tmp_cwd.glob("*"))
-    assert len(files) == 3
-    for config_type in ["pipeline", "run", "collect_run"]:
-        assert tmp_cwd / f"config_{config_type}.json" in files
-
-    pipe_config_fp = tmp_cwd / "config_pipeline.json"
-    run_config_fp = tmp_cwd / "config_run.json"
-    with open(run_config_fp, "r") as config_file:
-        config = json.load(config_file)
-
-    assert config["project_points"] == CommandDocumentation.REQUIRED_TAG
-    exec_control = config["execution_control"]
-    assert exec_control["max_workers"] == CommandDocumentation.REQUIRED_TAG
-    assert exec_control["nodes"] == 1
-    config["project_points"] = PROJECT_POINTS
-    config["execution_control"]["option"] = "local"
-    config["execution_control"]["max_workers"] = MAX_WORKERS
-
-    with open(run_config_fp, "w") as config_file:
-        json.dump(config, config_file)
-
-    assert not set(tmp_cwd.glob(file_pattern))
-    assert tmp_cwd / "logs" not in set(tmp_cwd.glob("*"))
-    assert tmp_cwd / "chunk_files" not in set(tmp_cwd.glob("*"))
-
-    cli_runner.invoke(
-        main, ["pipeline", "-c", pipe_config_fp.as_posix(), "--monitor"]
-    )
-    assert len(set((tmp_cwd / "logs").glob("*run*"))) == 2
-    assert len(set(tmp_cwd.glob(file_pattern))) == 1
-    assert tmp_cwd / "logs" in set(tmp_cwd.glob("*"))
-    assert tmp_cwd / "chunk_files" in set(tmp_cwd.glob("*"))
-    assert len(set((tmp_cwd / "chunk_files").glob(file_pattern))) == 4
-
-    log_file = set((tmp_cwd / "logs").glob("*collect_run*"))
-    assert len(log_file) == 1
-
-    with open(log_file.pop(), "r") as log:
-        assert "DEBUG" not in log.read()
-
-    h5_files = set(tmp_cwd.glob("*.h5"))
-    assert len(h5_files) == 1
-
-    with h5py.File(h5_files.pop(), "r") as collected_outputs:
-        assert len(collected_outputs.keys()) == 5
-        assert "cf_mean" in collected_outputs
-        assert "lcoe_fcr" in collected_outputs
-        cf_profiles = collected_outputs["cf_profile"][...]
-
-    profiles = manual_collect(data_dir / file_pattern, "cf_profile")
-    assert np.allclose(profiles, cf_profiles)
-
-    assert_message_was_logged("Pipeline job", "INFO")
-    assert_message_was_logged("is complete.", "INFO")
+# @pytest.mark.integration
+# def test_cli_monitor(
+#     tmp_cwd,
+#     cli_runner,
+#     collect_pattern,
+#     manual_collect,
+#     runnable_script,
+#     assert_message_was_logged,
+# ):
+#     """Integration test of `make_cli` with monitor"""
+
+#     data_dir, file_pattern = collect_pattern
+
+#     def preprocess_run_config(config, project_dir, out_dir):
+#         assert project_dir == out_dir
+#         config["dest_dir"] = str(project_dir)
+#         config["source_dir"] = str(data_dir)
+#         config["file_pattern"] = f"./{file_pattern}"
+#         return config
+
+#     commands = [
+#         CLICommandFromFunction(
+#             _copy_files,
+#             name="run",
+#             add_collect=True,
+#             split_keys=["project_points"],
+#             config_preprocessor=preprocess_run_config,
+#         )
+#     ]
+
+#     main = make_cli(commands)
+
+#     assert not set(tmp_cwd.glob("*"))
+#     cli_runner.invoke(main, ["template-configs"])
+#     files = set(tmp_cwd.glob("*"))
+#     assert len(files) == 3
+#     for config_type in ["pipeline", "run", "collect_run"]:
+#         assert tmp_cwd / f"config_{config_type}.json" in files
+
+#     pipe_config_fp = tmp_cwd / "config_pipeline.json"
+#     run_config_fp = tmp_cwd / "config_run.json"
+#     with open(run_config_fp, "r") as config_file:
+#         config = json.load(config_file)
+
+#     assert config["project_points"] == CommandDocumentation.REQUIRED_TAG
+#     exec_control = config["execution_control"]
+#     assert exec_control["max_workers"] == CommandDocumentation.REQUIRED_TAG
+#     assert exec_control["nodes"] == 1
+#     config["project_points"] = PROJECT_POINTS
+#     config["execution_control"]["option"] = "local"
+#     config["execution_control"]["max_workers"] = MAX_WORKERS
+
+#     with open(run_config_fp, "w") as config_file:
+#         json.dump(config, config_file)
+
+#     assert not set(tmp_cwd.glob(file_pattern))
+#     assert tmp_cwd / "logs" not in set(tmp_cwd.glob("*"))
+#     assert tmp_cwd / "chunk_files" not in set(tmp_cwd.glob("*"))
+
+#     cli_runner.invoke(
+#         main, ["pipeline", "-c", pipe_config_fp.as_posix(), "--monitor"]
+#     )
+#     assert len(set((tmp_cwd / "logs").glob("*run*"))) == 2
+#     assert len(set(tmp_cwd.glob(file_pattern))) == 1
+#     assert tmp_cwd / "logs" in set(tmp_cwd.glob("*"))
+#     assert tmp_cwd / "chunk_files" in set(tmp_cwd.glob("*"))
+#     assert len(set((tmp_cwd / "chunk_files").glob(file_pattern))) == 4
+
+#     log_file = set((tmp_cwd / "logs").glob("*collect_run*"))
+#     assert len(log_file) == 1
+
+#     with open(log_file.pop(), "r") as log:
+#         assert "DEBUG" not in log.read()
+
+#     h5_files = set(tmp_cwd.glob("*.h5"))
+#     assert len(h5_files) == 1
+
+#     with h5py.File(h5_files.pop(), "r") as collected_outputs:
+#         assert len(collected_outputs.keys()) == 5
+#         assert "cf_mean" in collected_outputs
+#         assert "lcoe_fcr" in collected_outputs
+#         cf_profiles = collected_outputs["cf_profile"][...]
+
+#     profiles = manual_collect(data_dir / file_pattern, "cf_profile")
+#     assert np.allclose(profiles, cf_profiles)
+
+#     assert_message_was_logged("Pipeline job", "INFO")
+#     assert_message_was_logged("is complete.", "INFO")
 
 
 @pytest.mark.skipif(
diff --git a/tests/cli/test_cli_status.py b/tests/cli/test_cli_status.py
index e0eb81d9..de70c11f 100644
--- a/tests/cli/test_cli_status.py
+++ b/tests/cli/test_cli_status.py
@@ -84,14 +84,14 @@ def test_status(test_data_dir, cli_runner, extra_args, monkeypatch):
         "gaps_test_run_j3 submitted local",
         "collect-run not submitted",
     ]
-
+    start_ind = -(len(lines) - 1)
     for ind, partial in enumerate(expected_partial_lines):
         assert all(
-            string in lines[-16 + ind] for string in partial.split()
+            string in lines[start_ind + ind] for string in partial.split()
         ), partial
 
-    assert "Total Runtime" in lines[-6]
-    assert "Total project wall time" in lines[-5]
+    assert "Total node runtime" in lines[-5]
+    assert "Total project wall time" in lines[-3]
     assert lines[-4] == "Total AUs spent: 6"
 
 
@@ -162,15 +162,16 @@ def test_status_with_hardware_check(
         "collect-run not submitted",
     ]
 
+    start_ind = -(len(lines) - 1)
     for ind, partial in enumerate(expected_partial_lines):
         assert all(
-            string in lines[-16 + ind] for string in partial.split()
+            string in lines[start_ind + ind] for string in partial.split()
         ), partial
-        if "failed" in lines[-16 + ind]:
-            assert not "(r)" in lines[-16 + ind]
+        if "failed" in lines[start_ind + ind]:
+            assert not "(r)" in lines[start_ind + ind]
 
-    assert "Total Runtime" in lines[-6]
-    assert "Total project wall time" in lines[-5]
+    assert "Total node runtime" in lines[-5]
+    assert "Total project wall time" in lines[-3]
     assert lines[-4] == "Total AUs spent: 6"
 
 
@@ -253,12 +254,10 @@ def test_failed_run(
         "gaps_test_failed_run_j1 failed local high",
         "gaps_test_failed_run_j2 failed local unspecified",
     ]
-    if single_command:
-        start_ind = -12
-    else:
+    if not single_command:
         expected_partial_lines += ["collect-run not submitted"]
-        start_ind = -14
 
+    start_ind = -(len(lines) - 1)
     for ind, partial in enumerate(expected_partial_lines):
         assert all(
             string in lines[start_ind + ind] for string in partial.split()
@@ -266,13 +265,34 @@ def test_failed_run(
         assert not "(r)" in lines[start_ind + ind]
         assert "Total AUs spent" not in lines[start_ind + ind]
         if single_command:
-            assert "Total project wall time" not in lines[start_ind + 10]
+            assert "Total project wall time" not in lines[start_ind + 12]
 
     if single_command:
-        assert "Total Runtime: 0:00:00" in lines[-4]
+        assert "Total node runtime: 0:00:00" in lines[-3]
     else:
-        assert "Total Runtime: 0:00:00" in lines[-5]
-        assert "Total project wall time" in lines[-4]
+        assert "Total node runtime: 0:00:00" in lines[-4]
+        assert "Total project wall time" in lines[-3]
+
+
+def test_recursive_status(tmp_path, test_data_dir, cli_runner, monkeypatch):
+    """Test the status command for recursive directories."""
+
+    monkeypatch.setattr(psutil, "pid_exists", lambda *__: True, raising=True)
+
+    status = status_command()
+    shutil.copytree(test_data_dir / "test_run", tmp_path / "test_run")
+    shutil.copytree(
+        test_data_dir / "test_failed_run",
+        tmp_path / "test_run" / "test_failed_run",
+    )
+    run_dir = (tmp_path / "test_run").as_posix()
+
+    result = cli_runner.invoke(status, [run_dir, "-r"])
+
+    lines = result.stdout.split("\n")
+    assert any(line == "test_run:" for line in lines)
+    assert any(line == "test_failed_run:" for line in lines)
+    assert len(lines) > 20
 
 
 if __name__ == "__main__":

From a3ad402ff3e16d09d5b63b1800081d4c7dd63606 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Wed, 9 Aug 2023 19:01:32 -0600
Subject: [PATCH 56/61] Updated tests to be more robust against messages in
 result

---
 tests/cli/test_cli_status.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/tests/cli/test_cli_status.py b/tests/cli/test_cli_status.py
index de70c11f..e2365813 100644
--- a/tests/cli/test_cli_status.py
+++ b/tests/cli/test_cli_status.py
@@ -84,11 +84,11 @@ def test_status(test_data_dir, cli_runner, extra_args, monkeypatch):
         "gaps_test_run_j3 submitted local",
         "collect-run not submitted",
     ]
-    start_ind = -(len(lines) - 1)
+    start_ind = -21
     for ind, partial in enumerate(expected_partial_lines):
         assert all(
             string in lines[start_ind + ind] for string in partial.split()
-        ), partial
+        ), f"{partial}, {lines[start_ind + ind:]}"
 
     assert "Total node runtime" in lines[-5]
     assert "Total project wall time" in lines[-3]
@@ -162,11 +162,11 @@ def test_status_with_hardware_check(
         "collect-run not submitted",
     ]
 
-    start_ind = -(len(lines) - 1)
+    start_ind = -19
     for ind, partial in enumerate(expected_partial_lines):
         assert all(
             string in lines[start_ind + ind] for string in partial.split()
-        ), partial
+        ), f"{partial}, {lines[start_ind + ind:]}"
         if "failed" in lines[start_ind + ind]:
             assert not "(r)" in lines[start_ind + ind]
 
@@ -254,14 +254,17 @@ def test_failed_run(
         "gaps_test_failed_run_j1 failed local high",
         "gaps_test_failed_run_j2 failed local unspecified",
     ]
-    if not single_command:
+
+    if single_command:
+        start_ind = -13
+    else:
+        start_ind = -16
         expected_partial_lines += ["collect-run not submitted"]
 
-    start_ind = -(len(lines) - 1)
     for ind, partial in enumerate(expected_partial_lines):
         assert all(
             string in lines[start_ind + ind] for string in partial.split()
-        ), partial
+        ), f"{partial}, {lines[start_ind + ind]:}"
         assert not "(r)" in lines[start_ind + ind]
         assert "Total AUs spent" not in lines[start_ind + ind]
         if single_command:

From 5b805b1d341075517c0bd9c8dbb68b5c984de462 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Wed, 9 Aug 2023 19:01:39 -0600
Subject: [PATCH 57/61] Bump version

---
 gaps/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gaps/version.py b/gaps/version.py
index 5ffd5b6e..c6b69185 100644
--- a/gaps/version.py
+++ b/gaps/version.py
@@ -1,3 +1,3 @@
 """GAPs Version Number. """
 
-__version__ = "0.3.4"
+__version__ = "0.4.0"

From 6966cfd86d48163c9760504647850db87265f261 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Wed, 9 Aug 2023 21:58:50 -0600
Subject: [PATCH 58/61] Better str comparison in test

---
 tests/test_hpc.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/test_hpc.py b/tests/test_hpc.py
index a88b07c4..d7dd7774 100644
--- a/tests/test_hpc.py
+++ b/tests/test_hpc.py
@@ -325,7 +325,9 @@ def test_submit(monkeypatch):
     assert not err
 
     out, err = submit(cmd_squeue, background=False, background_stdout=False)
-    assert out == SQUEUE_RAW.replace("\n", "\r\n")
+    out = out.replace("\n", "").replace("\r", "")
+    expected = SQUEUE_RAW.replace("\n", "").replace("\r", "")
+    assert out == expected
     assert not err
 
     with pytest.raises(OSError):

From 8ccba102c9e0c02ef947053edcfeb0b116c1f9be Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Wed, 9 Aug 2023 22:00:39 -0600
Subject: [PATCH 59/61] Install pytest with gaps

---
 .github/workflows/pr_rev_cli_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr_rev_cli_tests.yml b/.github/workflows/pr_rev_cli_tests.yml
index ee0cbc9d..f3615af2 100644
--- a/.github/workflows/pr_rev_cli_tests.yml
+++ b/.github/workflows/pr_rev_cli_tests.yml
@@ -32,7 +32,7 @@ jobs:
       working-directory: ./gaps
       shell: bash -l {0}
       run: |
-        pip install -e .
+        pip install -e .[dev]
     - name: Run reV CLI tests
       working-directory: ./reV
       shell: bash -l {0}

From da8ac58c137be8c26f144db736a4bf39d507436c Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Wed, 9 Aug 2023 22:04:11 -0600
Subject: [PATCH 60/61] Increase wait limit for test

---
 tests/cli/test_cli.py | 178 +++++++++++++++++++++---------------------
 1 file changed, 89 insertions(+), 89 deletions(-)

diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py
index ed648e29..1dc6fe0d 100644
--- a/tests/cli/test_cli.py
+++ b/tests/cli/test_cli.py
@@ -243,94 +243,94 @@ def preprocess_run_config(config, project_dir, out_dir):
     assert_message_was_logged("is complete.", "INFO")
 
 
-# @pytest.mark.integration
-# def test_cli_monitor(
-#     tmp_cwd,
-#     cli_runner,
-#     collect_pattern,
-#     manual_collect,
-#     runnable_script,
-#     assert_message_was_logged,
-# ):
-#     """Integration test of `make_cli` with monitor"""
-
-#     data_dir, file_pattern = collect_pattern
-
-#     def preprocess_run_config(config, project_dir, out_dir):
-#         assert project_dir == out_dir
-#         config["dest_dir"] = str(project_dir)
-#         config["source_dir"] = str(data_dir)
-#         config["file_pattern"] = f"./{file_pattern}"
-#         return config
-
-#     commands = [
-#         CLICommandFromFunction(
-#             _copy_files,
-#             name="run",
-#             add_collect=True,
-#             split_keys=["project_points"],
-#             config_preprocessor=preprocess_run_config,
-#         )
-#     ]
-
-#     main = make_cli(commands)
-
-#     assert not set(tmp_cwd.glob("*"))
-#     cli_runner.invoke(main, ["template-configs"])
-#     files = set(tmp_cwd.glob("*"))
-#     assert len(files) == 3
-#     for config_type in ["pipeline", "run", "collect_run"]:
-#         assert tmp_cwd / f"config_{config_type}.json" in files
-
-#     pipe_config_fp = tmp_cwd / "config_pipeline.json"
-#     run_config_fp = tmp_cwd / "config_run.json"
-#     with open(run_config_fp, "r") as config_file:
-#         config = json.load(config_file)
-
-#     assert config["project_points"] == CommandDocumentation.REQUIRED_TAG
-#     exec_control = config["execution_control"]
-#     assert exec_control["max_workers"] == CommandDocumentation.REQUIRED_TAG
-#     assert exec_control["nodes"] == 1
-#     config["project_points"] = PROJECT_POINTS
-#     config["execution_control"]["option"] = "local"
-#     config["execution_control"]["max_workers"] = MAX_WORKERS
-
-#     with open(run_config_fp, "w") as config_file:
-#         json.dump(config, config_file)
-
-#     assert not set(tmp_cwd.glob(file_pattern))
-#     assert tmp_cwd / "logs" not in set(tmp_cwd.glob("*"))
-#     assert tmp_cwd / "chunk_files" not in set(tmp_cwd.glob("*"))
-
-#     cli_runner.invoke(
-#         main, ["pipeline", "-c", pipe_config_fp.as_posix(), "--monitor"]
-#     )
-#     assert len(set((tmp_cwd / "logs").glob("*run*"))) == 2
-#     assert len(set(tmp_cwd.glob(file_pattern))) == 1
-#     assert tmp_cwd / "logs" in set(tmp_cwd.glob("*"))
-#     assert tmp_cwd / "chunk_files" in set(tmp_cwd.glob("*"))
-#     assert len(set((tmp_cwd / "chunk_files").glob(file_pattern))) == 4
-
-#     log_file = set((tmp_cwd / "logs").glob("*collect_run*"))
-#     assert len(log_file) == 1
-
-#     with open(log_file.pop(), "r") as log:
-#         assert "DEBUG" not in log.read()
-
-#     h5_files = set(tmp_cwd.glob("*.h5"))
-#     assert len(h5_files) == 1
-
-#     with h5py.File(h5_files.pop(), "r") as collected_outputs:
-#         assert len(collected_outputs.keys()) == 5
-#         assert "cf_mean" in collected_outputs
-#         assert "lcoe_fcr" in collected_outputs
-#         cf_profiles = collected_outputs["cf_profile"][...]
-
-#     profiles = manual_collect(data_dir / file_pattern, "cf_profile")
-#     assert np.allclose(profiles, cf_profiles)
-
-#     assert_message_was_logged("Pipeline job", "INFO")
-#     assert_message_was_logged("is complete.", "INFO")
+@pytest.mark.integration
+def test_cli_monitor(
+    tmp_cwd,
+    cli_runner,
+    collect_pattern,
+    manual_collect,
+    runnable_script,
+    assert_message_was_logged,
+):
+    """Integration test of `make_cli` with monitor"""
+
+    data_dir, file_pattern = collect_pattern
+
+    def preprocess_run_config(config, project_dir, out_dir):
+        assert project_dir == out_dir
+        config["dest_dir"] = str(project_dir)
+        config["source_dir"] = str(data_dir)
+        config["file_pattern"] = f"./{file_pattern}"
+        return config
+
+    commands = [
+        CLICommandFromFunction(
+            _copy_files,
+            name="run",
+            add_collect=True,
+            split_keys=["project_points"],
+            config_preprocessor=preprocess_run_config,
+        )
+    ]
+
+    main = make_cli(commands)
+
+    assert not set(tmp_cwd.glob("*"))
+    cli_runner.invoke(main, ["template-configs"])
+    files = set(tmp_cwd.glob("*"))
+    assert len(files) == 3
+    for config_type in ["pipeline", "run", "collect_run"]:
+        assert tmp_cwd / f"config_{config_type}.json" in files
+
+    pipe_config_fp = tmp_cwd / "config_pipeline.json"
+    run_config_fp = tmp_cwd / "config_run.json"
+    with open(run_config_fp, "r") as config_file:
+        config = json.load(config_file)
+
+    assert config["project_points"] == CommandDocumentation.REQUIRED_TAG
+    exec_control = config["execution_control"]
+    assert exec_control["max_workers"] == CommandDocumentation.REQUIRED_TAG
+    assert exec_control["nodes"] == 1
+    config["project_points"] = PROJECT_POINTS
+    config["execution_control"]["option"] = "local"
+    config["execution_control"]["max_workers"] = MAX_WORKERS
+
+    with open(run_config_fp, "w") as config_file:
+        json.dump(config, config_file)
+
+    assert not set(tmp_cwd.glob(file_pattern))
+    assert tmp_cwd / "logs" not in set(tmp_cwd.glob("*"))
+    assert tmp_cwd / "chunk_files" not in set(tmp_cwd.glob("*"))
+
+    cli_runner.invoke(
+        main, ["pipeline", "-c", pipe_config_fp.as_posix(), "--monitor"]
+    )
+    assert len(set((tmp_cwd / "logs").glob("*run*"))) == 2
+    assert len(set(tmp_cwd.glob(file_pattern))) == 1
+    assert tmp_cwd / "logs" in set(tmp_cwd.glob("*"))
+    assert tmp_cwd / "chunk_files" in set(tmp_cwd.glob("*"))
+    assert len(set((tmp_cwd / "chunk_files").glob(file_pattern))) == 4
+
+    log_file = set((tmp_cwd / "logs").glob("*collect_run*"))
+    assert len(log_file) == 1
+
+    with open(log_file.pop(), "r") as log:
+        assert "DEBUG" not in log.read()
+
+    h5_files = set(tmp_cwd.glob("*.h5"))
+    assert len(h5_files) == 1
+
+    with h5py.File(h5_files.pop(), "r") as collected_outputs:
+        assert len(collected_outputs.keys()) == 5
+        assert "cf_mean" in collected_outputs
+        assert "lcoe_fcr" in collected_outputs
+        cf_profiles = collected_outputs["cf_profile"][...]
+
+    profiles = manual_collect(data_dir / file_pattern, "cf_profile")
+    assert np.allclose(profiles, cf_profiles)
+
+    assert_message_was_logged("Pipeline job", "INFO")
+    assert_message_was_logged("is complete.", "INFO")
 
 
 @pytest.mark.skipif(
@@ -406,7 +406,7 @@ def preprocess_run_config(config, project_dir, out_dir):
     if os.getpid() == status["monitor_pid"]:
         # Wait to die
         for __ in range(10):
-            time.sleep(10)
+            time.sleep(60)
         pytest.exit(0)
 
     assert (

From ce9fd126b43d7a1f80a8e9ef2a109b02c8e7f331 Mon Sep 17 00:00:00 2001
From: ppinchuk <ppinchuk@nrel.gov>
Date: Wed, 9 Aug 2023 22:09:32 -0600
Subject: [PATCH 61/61] Increase wait time in tests again

---
 tests/cli/test_cli.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py
index 1dc6fe0d..7f2fabb9 100644
--- a/tests/cli/test_cli.py
+++ b/tests/cli/test_cli.py
@@ -416,8 +416,8 @@ def preprocess_run_config(config, project_dir, out_dir):
         != StatusOption.SUCCESSFUL
     )
 
-    for __ in range(6):
-        time.sleep(10)
+    for __ in range(10):
+        time.sleep(60)
         collect_status = Status.retrieve_job_status(
             tmp_cwd, "collect-run", f"{tmp_cwd.name}_collect_run"
         )