diff --git a/CHANGELOG.md b/CHANGELOG.md index c0760da46..f994a4d05 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,18 @@ All notable changes to Merlin will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.11.0] +### Added +- New reserved variable: + - `VLAUNCHER`: The same functionality as the `LAUNCHER` variable, but will substitute shell variables `MERLIN_NODES`, `MERLIN_PROCS`, `MERLIN_CORES`, and `MERLIN_GPUS` for nodes, procs, cores per task, and gpus + +### Changed +- Hardcoded Sphinx v5.3.0 requirement is now removed so we can use latest Sphinx + +### Fixed +- A bug where the filenames in iterative workflows kept appending `.out`, `.partial`, or `.expanded` to the filenames stored in the `merlin_info/` subdirectory +- A bug where a skewed sample hierarchy was created when a restart was necessary in the `add_merlin_expanded_chain_to_chord` task + ## [1.10.3] ### Added - The *.conf regex for the recursive-include of the merlin server directory so that pip will add it to the wheel diff --git a/Makefile b/Makefile index 74c407db0..4a857a217 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/docs/requirements.in b/docs/requirements.in new file mode 100644 index 000000000..268785121 --- /dev/null +++ b/docs/requirements.in @@ -0,0 +1,4 @@ +# This file will list all requirements for the docs so we can freeze a version of them for release. +# To freeze the versions run: +# pip-compile requirements.in +sphinx \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt index c771e60dc..5d3faecfe 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,56 +1 @@ -# -# This file is autogenerated by pip-compile with python 3.8 -# To update, run: -# -# pip-compile requirements.in -# -alabaster==0.7.12 - # via sphinx -babel==2.10.3 - # via sphinx -certifi==2023.7.22 - # via requests -charset-normalizer==2.1.1 - # via requests -docutils==0.17.1 - # via sphinx -idna==3.4 - # via requests -imagesize==1.4.1 - # via sphinx -importlib-metadata==5.0.0 - # via sphinx -jinja2==3.0.3 - # via sphinx -markupsafe==2.1.1 - # via jinja2 -packaging==21.3 - # via sphinx -pygments==2.15.0 - # via sphinx -pyparsing==3.0.9 - # via packaging -pytz==2022.5 - # via babel -requests==2.31.0 - # via sphinx -snowballstemmer==2.2.0 - # via sphinx -sphinx==5.3.0 - # via -r requirements.in -sphinxcontrib-applehelp==1.0.2 - # via sphinx -sphinxcontrib-devhelp==1.0.2 - # via sphinx -sphinxcontrib-htmlhelp==2.0.0 - # via sphinx -sphinxcontrib-jsmath==1.0.1 - # via sphinx -sphinxcontrib-qthelp==1.0.3 - # via sphinx -sphinxcontrib-serializinghtml==1.1.5 - # via sphinx -urllib3==1.26.12 - # via requests -zipp==3.10.0 - # via importlib-metadata +sphinx>=5.3.0 diff --git a/docs/source/merlin_variables.rst b/docs/source/merlin_variables.rst index d67e06412..f8ea7fce7 100644 --- a/docs/source/merlin_variables.rst +++ b/docs/source/merlin_variables.rst @@ -160,8 +160,9 @@ Reserved variables $(MERLIN_INFO)/*.expanded.yaml -The ``LAUNCHER`` Variable -+++++++++++++++++++++++++ + +The ``LAUNCHER`` and ``VLAUNCHER`` Variables ++++++++++++++++++++++++++++++++++++++++++++++++ ``$(LAUNCHER)`` is a special case of a reserved variable since it's value *can* be changed. It serves as an abstraction to launch a job with parallel schedulers like :ref:`slurm`, @@ -187,6 +188,54 @@ We can modify this to use the ``$(LAUNCHER)`` variable like so: In other words, the ``$(LAUNCHER)`` variable would become ``srun -N 1 -n 3``. +Similarly, the ``$(VLAUNCHER)`` variable behaves similarly to the ``$(LAUNCHER)`` variable. +The key distinction lies in its source of information. Instead of drawing certain configuration +options from the ``run`` section of a step, it retrieves specific shell variables. These shell +variables are automatically generated by Merlin when you include the ``$(VLAUNCHER)`` variable +in a step command, but they can also be customized by the user. Currently, the following shell +variables are: + +.. list-table:: VLAUNCHER Variables + :widths: 25 50 25 + :header-rows: 1 + + * - Variable + - Description + - Default + + * - ``${MERLIN_NODES}`` + - The number of nodes + - 1 + + * - ``${MERLIN_PROCS}`` + - The number of tasks/procs + - 1 + + * - ``${MERLIN_CORES}`` + - The number of cores per task/proc + - 1 + + * - ``${MERLIN_GPUS}`` + - The number of gpus per task/proc + - 0 + +Let's say we have the following defined in our yaml file: + +.. code:: yaml + + batch: + type: flux + + run: + cmd: | + MERLIN_NODES=4 + MERLIN_PROCS=2 + MERLIN_CORES=8 + MERLIN_GPUS=2 + $(VLAUNCHER) python script.py + +The ``$(VLAUNCHER)`` variable would be substituted to ``flux run -N 4 -n 2 -c 8 -g 2``. + User variables ------------------- Variables defined by a specification file in the ``env`` section, as in this example: diff --git a/merlin/__init__.py b/merlin/__init__.py index 12ba225cd..20a0e8b3e 100644 --- a/merlin/__init__.py +++ b/merlin/__init__.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # @@ -38,7 +38,7 @@ import sys -__version__ = "1.10.3" +__version__ = "1.11.0" VERSION = __version__ PATH_TO_PROJ = os.path.join(os.path.dirname(__file__), "") diff --git a/merlin/ascii_art.py b/merlin/ascii_art.py index b56da4d7a..f823937a6 100644 --- a/merlin/ascii_art.py +++ b/merlin/ascii_art.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/celery.py b/merlin/celery.py index 9921bbb89..95f26530e 100644 --- a/merlin/celery.py +++ b/merlin/celery.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/common/__init__.py b/merlin/common/__init__.py index 2a6208883..d6f53d03d 100644 --- a/merlin/common/__init__.py +++ b/merlin/common/__init__.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/common/abstracts/__init__.py b/merlin/common/abstracts/__init__.py index 2a6208883..d6f53d03d 100644 --- a/merlin/common/abstracts/__init__.py +++ b/merlin/common/abstracts/__init__.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/common/abstracts/enums/__init__.py b/merlin/common/abstracts/enums/__init__.py index a90133b73..7b8ab80f5 100644 --- a/merlin/common/abstracts/enums/__init__.py +++ b/merlin/common/abstracts/enums/__init__.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/common/openfilelist.py b/merlin/common/openfilelist.py index 00aaea917..124c7851d 100644 --- a/merlin/common/openfilelist.py +++ b/merlin/common/openfilelist.py @@ -8,7 +8,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/common/opennpylib.py b/merlin/common/opennpylib.py index 0f7607a8e..a8f8dffb2 100644 --- a/merlin/common/opennpylib.py +++ b/merlin/common/opennpylib.py @@ -8,7 +8,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/common/sample_index.py b/merlin/common/sample_index.py index 4f7333f6d..149d52e13 100644 --- a/merlin/common/sample_index.py +++ b/merlin/common/sample_index.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/common/sample_index_factory.py b/merlin/common/sample_index_factory.py index 55601073e..dc13d41d1 100644 --- a/merlin/common/sample_index_factory.py +++ b/merlin/common/sample_index_factory.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/common/security/__init__.py b/merlin/common/security/__init__.py index 2a6208883..d6f53d03d 100644 --- a/merlin/common/security/__init__.py +++ b/merlin/common/security/__init__.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/common/security/encrypt.py b/merlin/common/security/encrypt.py index 1059383d9..125ec5bed 100644 --- a/merlin/common/security/encrypt.py +++ b/merlin/common/security/encrypt.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/common/security/encrypt_backend_traffic.py b/merlin/common/security/encrypt_backend_traffic.py index cee757b91..68e178b77 100644 --- a/merlin/common/security/encrypt_backend_traffic.py +++ b/merlin/common/security/encrypt_backend_traffic.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/common/tasks.py b/merlin/common/tasks.py index 56051756b..fbd401826 100644 --- a/merlin/common/tasks.py +++ b/merlin/common/tasks.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # @@ -298,27 +298,33 @@ def add_merlin_expanded_chain_to_chord( # pylint: disable=R0913,R0914 LOG.debug("chain added to chord") else: # recurse down the sample_index hierarchy - LOG.debug("recursing down sample_index hierarchy") - for next_index in sample_index.children.values(): - next_index.name = os.path.join(sample_index.name, next_index.name) - LOG.debug("generating next step") - next_step = add_merlin_expanded_chain_to_chord.s( - task_type, - chain_, - samples[next_index.min - min_sample_id : next_index.max - min_sample_id], - labels, - next_index, - adapter_config, - next_index.min, - ) - next_step.set(queue=chain_[0].get_task_queue()) - LOG.debug(f"recursing with range {next_index.min}:{next_index.max}, {next_index.name} {signature(next_step)}") - LOG.debug(f"queuing samples[{next_index.min}:{next_index.max}] in for {chain_} in {next_index.name}...") - if self.request.is_eager: - next_step.delay() - else: - self.add_to_chord(next_step, lazy=False) - LOG.debug(f"queued for samples[{next_index.min}:{next_index.max}] in for {chain_} in {next_index.name}") + try: + LOG.debug("recursing down sample_index hierarchy") + for next_index in sample_index.children.values(): + next_index_name_before = next_index.name + next_index.name = os.path.join(sample_index.name, next_index.name) + LOG.debug("generating next step") + next_step = add_merlin_expanded_chain_to_chord.s( + task_type, + chain_, + samples[next_index.min - min_sample_id : next_index.max - min_sample_id], + labels, + next_index, + adapter_config, + next_index.min, + ) + next_step.set(queue=chain_[0].get_task_queue()) + LOG.debug(f"recursing with range {next_index.min}:{next_index.max}, {next_index.name} {signature(next_step)}") + LOG.debug(f"queuing samples[{next_index.min}:{next_index.max}] in for {chain_} in {next_index.name}...") + if self.request.is_eager: + next_step.delay() + else: + self.add_to_chord(next_step, lazy=False) + LOG.debug(f"queued for samples[{next_index.min}:{next_index.max}] in for {chain_} in {next_index.name}") + except retry_exceptions as e: + # Reset the index to what it was before so we don't accidentally create a bunch of extra samples upon restart + next_index.name = next_index_name_before + raise e return ReturnCode.OK diff --git a/merlin/common/util_sampling.py b/merlin/common/util_sampling.py index 8137e0543..c29763485 100644 --- a/merlin/common/util_sampling.py +++ b/merlin/common/util_sampling.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/config/__init__.py b/merlin/config/__init__.py index 7af320b52..0594ffe45 100644 --- a/merlin/config/__init__.py +++ b/merlin/config/__init__.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/config/broker.py b/merlin/config/broker.py index 78658333a..fe49ff162 100644 --- a/merlin/config/broker.py +++ b/merlin/config/broker.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/config/celeryconfig.py b/merlin/config/celeryconfig.py index e688945cc..0ff305962 100644 --- a/merlin/config/celeryconfig.py +++ b/merlin/config/celeryconfig.py @@ -10,7 +10,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/config/configfile.py b/merlin/config/configfile.py index d46a1d038..1f3418377 100644 --- a/merlin/config/configfile.py +++ b/merlin/config/configfile.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/config/results_backend.py b/merlin/config/results_backend.py index a619ecb03..d3e7002e7 100644 --- a/merlin/config/results_backend.py +++ b/merlin/config/results_backend.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/config/utils.py b/merlin/config/utils.py index 1385c4f35..65fc6f85c 100644 --- a/merlin/config/utils.py +++ b/merlin/config/utils.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/data/celery/__init__.py b/merlin/data/celery/__init__.py index 2a6208883..d6f53d03d 100644 --- a/merlin/data/celery/__init__.py +++ b/merlin/data/celery/__init__.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/display.py b/merlin/display.py index f59255ddb..a0470938c 100644 --- a/merlin/display.py +++ b/merlin/display.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/examples/__init__.py b/merlin/examples/__init__.py index 2a6208883..d6f53d03d 100644 --- a/merlin/examples/__init__.py +++ b/merlin/examples/__init__.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/examples/examples.py b/merlin/examples/examples.py index 78152e6ee..1d756f00e 100644 --- a/merlin/examples/examples.py +++ b/merlin/examples/examples.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/examples/generator.py b/merlin/examples/generator.py index 5dbe2ebf5..294787857 100644 --- a/merlin/examples/generator.py +++ b/merlin/examples/generator.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/exceptions/__init__.py b/merlin/exceptions/__init__.py index ed7156aa5..cf272d93b 100644 --- a/merlin/exceptions/__init__.py +++ b/merlin/exceptions/__init__.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/log_formatter.py b/merlin/log_formatter.py index 6a6da63d8..3fba8cfc8 100644 --- a/merlin/log_formatter.py +++ b/merlin/log_formatter.py @@ -8,7 +8,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/main.py b/merlin/main.py index a29546798..198cf3804 100644 --- a/merlin/main.py +++ b/merlin/main.py @@ -8,7 +8,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/merlin_templates.py b/merlin/merlin_templates.py index 477887794..7936db03b 100644 --- a/merlin/merlin_templates.py +++ b/merlin/merlin_templates.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/router.py b/merlin/router.py index 465f0ad3d..476ab1c0f 100644 --- a/merlin/router.py +++ b/merlin/router.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/server/__init__.py b/merlin/server/__init__.py index 88f37fd2c..d04c75d72 100644 --- a/merlin/server/__init__.py +++ b/merlin/server/__init__.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. diff --git a/merlin/server/server_commands.py b/merlin/server/server_commands.py index 45e6ef3d3..45411131b 100644 --- a/merlin/server/server_commands.py +++ b/merlin/server/server_commands.py @@ -8,7 +8,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/server/server_config.py b/merlin/server/server_config.py index 5f109f7c8..414f7a407 100644 --- a/merlin/server/server_config.py +++ b/merlin/server/server_config.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/server/server_util.py b/merlin/server/server_util.py index a280abac5..2b8f1216d 100644 --- a/merlin/server/server_util.py +++ b/merlin/server/server_util.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/spec/__init__.py b/merlin/spec/__init__.py index 2a6208883..d6f53d03d 100644 --- a/merlin/spec/__init__.py +++ b/merlin/spec/__init__.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/spec/all_keys.py b/merlin/spec/all_keys.py index 7f9f66188..556f5924e 100644 --- a/merlin/spec/all_keys.py +++ b/merlin/spec/all_keys.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/spec/defaults.py b/merlin/spec/defaults.py index c4aad952c..8972d5cfe 100644 --- a/merlin/spec/defaults.py +++ b/merlin/spec/defaults.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # @@ -52,3 +52,11 @@ "generate": {"cmd": "echo 'Insert sample-generating command here'"}, "level_max_dirs": 25, } + +# Values of the form (step key to search for, default value if no step key found) +VLAUNCHER_VARS = { + "MERLIN_NODES": ("nodes", 1), + "MERLIN_PROCS": ("procs", 1), + "MERLIN_CORES": ("cores per task", 1), + "MERLIN_GPUS": ("gpus", 0), +} diff --git a/merlin/spec/expansion.py b/merlin/spec/expansion.py index e29f0e7b5..381bc72f4 100644 --- a/merlin/spec/expansion.py +++ b/merlin/spec/expansion.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/spec/override.py b/merlin/spec/override.py index 50d4f1c35..f3192a38e 100644 --- a/merlin/spec/override.py +++ b/merlin/spec/override.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/spec/specification.py b/merlin/spec/specification.py index eb165b617..32fe0f635 100644 --- a/merlin/spec/specification.py +++ b/merlin/spec/specification.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # @@ -45,7 +45,7 @@ from maestrowf.specification import YAMLSpecification from merlin.spec import all_keys, defaults -from merlin.utils import repr_timedelta +from merlin.utils import find_vlaunch_var, repr_timedelta LOG = logging.getLogger(__name__) @@ -369,6 +369,17 @@ def process_spec_defaults(self): defaults.STUDY_STEP_RUN["shell"] = self.batch["shell"] for step in self.study: MerlinSpec.fill_missing_defaults(step["run"], defaults.STUDY_STEP_RUN) + # Insert VLAUNCHER specific variables if necessary + if "$(VLAUNCHER)" in step["run"]["cmd"]: + SHSET = "" + if "csh" in step["run"]["shell"]: + SHSET = "set " + # We need to set default values for VLAUNCHER variables if they're not defined by the user + for vlaunch_var, vlaunch_val in defaults.VLAUNCHER_VARS.items(): + if not find_vlaunch_var(vlaunch_var.replace("MERLIN_", ""), step["run"]["cmd"], accept_no_matches=True): + # Look for predefined nodes/procs/cores/gpus values in the step and default to those + vlaunch_val = step["run"][vlaunch_val[0]] if vlaunch_val[0] in step["run"] else vlaunch_val[1] + step["run"]["cmd"] = f"{SHSET}{vlaunch_var}={vlaunch_val}\n" + step["run"]["cmd"] # fill in missing merlin section defaults MerlinSpec.fill_missing_defaults(self.merlin, defaults.MERLIN["merlin"]) diff --git a/merlin/study/__init__.py b/merlin/study/__init__.py index 2a6208883..d6f53d03d 100644 --- a/merlin/study/__init__.py +++ b/merlin/study/__init__.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/study/batch.py b/merlin/study/batch.py index 4298fca32..e02a65a32 100644 --- a/merlin/study/batch.py +++ b/merlin/study/batch.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/study/celeryadapter.py b/merlin/study/celeryadapter.py index 31ef03b7c..8b5ff196d 100644 --- a/merlin/study/celeryadapter.py +++ b/merlin/study/celeryadapter.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/study/dag.py b/merlin/study/dag.py index 6c977a756..ea4d22926 100644 --- a/merlin/study/dag.py +++ b/merlin/study/dag.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/study/script_adapter.py b/merlin/study/script_adapter.py index be66e1b97..6ecc79c5f 100644 --- a/merlin/study/script_adapter.py +++ b/merlin/study/script_adapter.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # @@ -42,12 +42,36 @@ from maestrowf.utils import start_process from merlin.common.abstracts.enums import ReturnCode -from merlin.utils import convert_timestring +from merlin.utils import convert_timestring, find_vlaunch_var LOG = logging.getLogger(__name__) +def setup_vlaunch(step_run: str, batch_type: str, gpu_config: bool) -> None: + """ + Check for the VLAUNCHER keyword int the step run string, find + the MERLIN variables and configure VLAUNCHER. + + :param `step_run`: the step.run command string + :param `batch_type`: the batch type string + :param `gpu_config`: bool to determin if gpus should be configured + :returns: None + """ + if "$(VLAUNCHER)" in step_run["cmd"]: + step_run["cmd"] = step_run["cmd"].replace("$(VLAUNCHER)", "$(LAUNCHER)") + + step_run["nodes"] = find_vlaunch_var("NODES", step_run["cmd"]) + step_run["procs"] = find_vlaunch_var("PROCS", step_run["cmd"]) + step_run["cores per task"] = find_vlaunch_var("CORES", step_run["cmd"]) + + if find_vlaunch_var("GPUS", step_run["cmd"]): + if gpu_config: + step_run["gpus"] = find_vlaunch_var("GPUS", step_run["cmd"]) + else: + LOG.warning(f"Merlin does not yet have the ability to set GPUs per task with {batch_type}. Coming soon.") + + class MerlinLSFScriptAdapter(SlurmScriptAdapter): """ A SchedulerScriptAdapter class for slurm blocking parallel launches, @@ -156,6 +180,23 @@ def get_parallelize_command(self, procs, nodes=None, **kwargs): return " ".join(args) + def write_script(self, ws_path, step): + """ + This will overwrite the write_script in method from Maestro's base ScriptAdapter + class but will eventually call it. This is necessary for the VLAUNCHER to work. + + :param `ws_path`: the path to the workspace where we'll write the scripts + :param `step`: the Maestro StudyStep object containing info for our step + :returns: a tuple containing: + - a boolean representing whether this step is to be scheduled or not + - Merlin can ignore this + - a path to the script for the cmd + - a path to the script for the restart cmd + """ + setup_vlaunch(step.run, "lsf", False) + + return super().write_script(ws_path, step) + class MerlinSlurmScriptAdapter(SlurmScriptAdapter): """ @@ -256,6 +297,23 @@ def get_parallelize_command(self, procs, nodes=None, **kwargs): return " ".join(args) + def write_script(self, ws_path, step): + """ + This will overwrite the write_script in method from Maestro's base ScriptAdapter + class but will eventually call it. This is necessary for the VLAUNCHER to work. + + :param `ws_path`: the path to the workspace where we'll write the scripts + :param `step`: the Maestro StudyStep object containing info for our step + :returns: a tuple containing: + - a boolean representing whether this step is to be scheduled or not + - Merlin can ignore this + - a path to the script for the cmd + - a path to the script for the restart cmd + """ + setup_vlaunch(step.run, "slurm", False) + + return super().write_script(ws_path, step) + class MerlinFluxScriptAdapter(MerlinSlurmScriptAdapter): """ @@ -319,6 +377,23 @@ def time_format(self, val): """ return convert_timestring(val, format_method="FSD") + def write_script(self, ws_path, step): + """ + This will overwrite the write_script in method from Maestro's base ScriptAdapter + class but will eventually call it. This is necessary for the VLAUNCHER to work. + + :param `ws_path`: the path to the workspace where we'll write the scripts + :param `step`: the Maestro StudyStep object containing info for our step + :returns: a tuple containing: + - a boolean representing whether this step is to be scheduled or not + - Merlin can ignore this + - a path to the script for the cmd + - a path to the script for the restart cmd + """ + setup_vlaunch(step.run, "flux", True) + + return super().write_script(ws_path, step) + class MerlinScriptAdapter(LocalScriptAdapter): """ diff --git a/merlin/study/step.py b/merlin/study/step.py index bdba0250d..5d877ba4f 100644 --- a/merlin/study/step.py +++ b/merlin/study/step.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/study/study.py b/merlin/study/study.py index 8f4ddb19d..b9ada35ea 100644 --- a/merlin/study/study.py +++ b/merlin/study/study.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # @@ -127,7 +127,8 @@ def __init__( # pylint: disable=R0913 def _set_special_file_vars(self): """Setter for the orig, partial, and expanded file paths of a study.""" - base_name = Path(self.filepath).stem + shortened_filepath = self.filepath.replace(".out", "").replace(".partial", "").replace(".expanded", "") + base_name = Path(shortened_filepath).stem self.special_vars["MERLIN_SPEC_ORIGINAL_TEMPLATE"] = os.path.join( self.info, base_name + ".orig.yaml", diff --git a/merlin/utils.py b/merlin/utils.py index 3eb4e5acc..33735085d 100644 --- a/merlin/utils.py +++ b/merlin/utils.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # @@ -497,6 +497,26 @@ def contains_shell_ref(string): return False +def find_vlaunch_var(vlaunch_var: str, step_cmd: str, accept_no_matches=False) -> str: + """ + Given a variable used for VLAUNCHER and the step cmd value, find + the variable. + + :param `vlaunch_var`: The name of the VLAUNCHER variable (without MERLIN_) + :param `step_cmd`: The string for the cmd of a step + :param `accept_no_matches`: If True, return None if we couldn't find the variable. Otherwise, raise an error. + :returns: the `vlaunch_var` variable or None + """ + matches = list(re.findall(rf"^(?!#).*MERLIN_{vlaunch_var}=\d+", step_cmd, re.MULTILINE)) + + if matches: + return f"${{MERLIN_{vlaunch_var}}}" + + if accept_no_matches: + return None + raise ValueError(f"VLAUNCHER used but could not find MERLIN_{vlaunch_var} in the step.") + + # Time utilities def convert_to_timedelta(timestr: Union[str, int]) -> timedelta: """Convert a timestring to a timedelta object. diff --git a/setup.py b/setup.py index d409c0a74..7c91d26c7 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/tests/integration/conditions.py b/tests/integration/conditions.py index 81c063112..b25010ca2 100644 --- a/tests/integration/conditions.py +++ b/tests/integration/conditions.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index bc19fd9c8..58460e18f 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/tests/integration/test_definitions.py b/tests/integration/test_definitions.py index 0fdedf07b..f59acf237 100644 --- a/tests/integration/test_definitions.py +++ b/tests/integration/test_definitions.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.10.3. +# This file is part of Merlin, Version: 1.11.0. # # For details, see https://github.com/LLNL/merlin. # @@ -407,13 +407,81 @@ def define_tests(): # pylint: disable=R0914,R0915 }, "dry launch flux": { "cmds": f"{run} {flux} --dry --local --no-errors --vars N_SAMPLES=2 OUTPUT_PATH=./{OUTPUT_DIR}", - "conditions": StepFileHasRegex( - "runs", - "*/runs.slurm.sh", - "flux_test", - OUTPUT_DIR, - get_flux_cmd("flux", no_errors=True), - ), + "conditions": [ + StepFileHasRegex( + "runs", + "*/runs.slurm.sh", + "flux_test", + OUTPUT_DIR, + get_flux_cmd("flux", no_errors=True), + ), + ################## + # VLAUNCHER TESTS + ################## + StepFileHasRegex( + "vlauncher_test", + "vlauncher_test.slurm.sh", + "flux_test", + OUTPUT_DIR, + r"flux run -n \$\{MERLIN_PROCS\} -N \$\{MERLIN_NODES\} -c \$\{MERLIN_CORES\}", + ), + StepFileHasRegex( + "vlauncher_test_step_defaults", + "vlauncher_test_step_defaults.slurm.sh", + "flux_test", + OUTPUT_DIR, + r"MERLIN_GPUS=1", + ), + StepFileHasRegex( + "vlauncher_test_step_defaults", + "vlauncher_test_step_defaults.slurm.sh", + "flux_test", + OUTPUT_DIR, + r"MERLIN_NODES=6", + ), + StepFileHasRegex( + "vlauncher_test_step_defaults", + "vlauncher_test_step_defaults.slurm.sh", + "flux_test", + OUTPUT_DIR, + r"MERLIN_PROCS=3", + ), + StepFileHasRegex( + "vlauncher_test_step_defaults", + "vlauncher_test_step_defaults.slurm.sh", + "flux_test", + OUTPUT_DIR, + r"MERLIN_CORES=2", + ), + StepFileHasRegex( + "vlauncher_test_no_step_defaults", + "vlauncher_test_no_step_defaults.slurm.sh", + "flux_test", + OUTPUT_DIR, + r"MERLIN_GPUS=0", + ), + StepFileHasRegex( + "vlauncher_test_no_step_defaults", + "vlauncher_test_no_step_defaults.slurm.sh", + "flux_test", + OUTPUT_DIR, + r"MERLIN_NODES=1", + ), + StepFileHasRegex( + "vlauncher_test_no_step_defaults", + "vlauncher_test_no_step_defaults.slurm.sh", + "flux_test", + OUTPUT_DIR, + r"MERLIN_PROCS=1", + ), + StepFileHasRegex( + "vlauncher_test_no_step_defaults", + "vlauncher_test_no_step_defaults.slurm.sh", + "flux_test", + OUTPUT_DIR, + r"MERLIN_CORES=1", + ), + ], "run type": "local", }, "dry launch lsf": { diff --git a/tests/integration/test_specs/flux_test.yaml b/tests/integration/test_specs/flux_test.yaml index fe0130526..99f15205c 100644 --- a/tests/integration/test_specs/flux_test.yaml +++ b/tests/integration/test_specs/flux_test.yaml @@ -33,6 +33,34 @@ study: depends: [runs*] task_queue: flux_test +- description: step that uses vlauncher + name: vlauncher_test + run: + cmd: | + MERLIN_NODES=6 + MERLIN_PROCS=3 + MERLIN_CORES=2 + $(VLAUNCHER) echo "step that uses vlauncher" + task_queue: flux_test + +- description: test vlauncher step defaults + name: vlauncher_test_step_defaults + run: + cmd: | + $(VLAUNCHER) echo "test vlauncher step defaults" + task_queue: flux_test + nodes: 6 + procs: 3 + cores per task: 2 + gpus: 1 + +- description: test vlauncher no step defaults + name: vlauncher_test_no_step_defaults + run: + cmd: | + $(VLAUNCHER) echo "test vlauncher no step defaults" + task_queue: flux_test + global.parameters: STUDY: label: STUDY.%%