From 29573d43b17f97f90d93ce44773770754d7359de Mon Sep 17 00:00:00 2001 From: Brian Gunnarson Date: Mon, 20 Nov 2023 14:42:08 -0800 Subject: [PATCH 1/6] remove a merge conflict statement that was missed --- merlin/__init__.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/merlin/__init__.py b/merlin/__init__.py index dda10809c..c1ad21b22 100644 --- a/merlin/__init__.py +++ b/merlin/__init__.py @@ -38,11 +38,7 @@ import sys -<<<<<<< HEAD -__version__ = "1.10.2" -======= __version__ = "1.11.1" ->>>>>>> 38651f2650e8aba97552c4575e97d66be3205545 VERSION = __version__ PATH_TO_PROJ = os.path.join(os.path.dirname(__file__), "") From a8a4abe1352a6661c79a159ce8046ba36966d78a Mon Sep 17 00:00:00 2001 From: Brian Gunnarson Date: Wed, 5 Jun 2024 13:20:49 -0700 Subject: [PATCH 2/6] fix flux node allocation issue --- merlin/study/batch.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/merlin/study/batch.py b/merlin/study/batch.py index 4c8f3e49f..b8c7cc956 100644 --- a/merlin/study/batch.py +++ b/merlin/study/batch.py @@ -134,6 +134,14 @@ def get_node_count(default=1): the environment cannot be determined. :param returns: (int) The number of nodes to use. """ + + # If flux is the scheduler, we can get the size of the allocation with this + try: + get_size_proc = subprocess.run("flux getattr size", shell=True, capture_output=True, text=True) + return int(get_size_proc.stdout) + except Exception: + pass + if "SLURM_JOB_NUM_NODES" in os.environ: return int(os.environ["SLURM_JOB_NUM_NODES"]) From e1c5f96c3eea6621baac9f912ff2e02011089c32 Mon Sep 17 00:00:00 2001 From: Brian Gunnarson Date: Wed, 5 Jun 2024 13:22:21 -0700 Subject: [PATCH 3/6] allow for vars to be used with nodes settings of workers/batch --- merlin/spec/merlinspec.json | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/merlin/spec/merlinspec.json b/merlin/spec/merlinspec.json index 4b8ca3633..7e9c912e4 100644 --- a/merlin/spec/merlinspec.json +++ b/merlin/spec/merlinspec.json @@ -221,7 +221,8 @@ "nodes": { "anyOf": [ {"type": "null"}, - {"type": "integer", "minimum": 1} + {"type": "integer", "minimum": 1}, + {"type": "string", "pattern": "^\\$\\(\\w+\\)$"} ] }, "batch": { @@ -279,7 +280,12 @@ "launch_pre": {"type": "string", "minLength": 1}, "launch_args": {"type": "string", "minLength": 1}, "worker_launch": {"type": "string", "minLength": 1}, - "nodes": {"type": "integer", "minimum": 1}, + "nodes": { + "anyOf": [ + {"type": "integer", "minimum": 1}, + {"type": "string","pattern": "^\\$\\(\\w+\\)$"} + ] + }, "walltime": { "anyOf": [ {"type": "string", "minLength": 1}, From b57078ff6d2e3441cc8400e43e631e897ef5d011 Mon Sep 17 00:00:00 2001 From: Brian Gunnarson Date: Wed, 5 Jun 2024 13:22:39 -0700 Subject: [PATCH 4/6] add tests for var usage with nodes --- tests/integration/definitions.py | 8 ++++++++ tests/integration/test_specs/flux_par_native_test.yaml | 2 ++ 2 files changed, 10 insertions(+) diff --git a/tests/integration/definitions.py b/tests/integration/definitions.py index 51c7fe6b6..a1959b62b 100644 --- a/tests/integration/definitions.py +++ b/tests/integration/definitions.py @@ -295,6 +295,14 @@ def define_tests(): # pylint: disable=R0914,R0915 "conditions": [HasReturnCode(), HasRegex(r"default_worker", negate=True)], "run type": "local", }, + "run-workers echo variable for worker nodes": { + "cmds": f"{workers_flux} {flux_native} --echo", + "conditions": [ + HasReturnCode(), + HasRegex(r"-N 4") + ], + "run type": "local", + } } wf_format_tests = { "local minimum_format": { diff --git a/tests/integration/test_specs/flux_par_native_test.yaml b/tests/integration/test_specs/flux_par_native_test.yaml index 8eaf4b024..6fd9021a4 100644 --- a/tests/integration/test_specs/flux_par_native_test.yaml +++ b/tests/integration/test_specs/flux_par_native_test.yaml @@ -14,6 +14,7 @@ env: OUTPUT_PATH: ./studies N_SAMPLES: 10 SCRIPTS: $(SPECROOT)/../../../merlin/examples/workflows/flux/scripts + WORKER_NODES: 4 study: - description: Build the code @@ -71,6 +72,7 @@ merlin: simworkers: args: -l INFO --concurrency 1 --prefetch-multiplier 1 -Ofair steps: [runs, data] + nodes: $(WORKER_NODES) samples: column_labels: [V1, V2] file: $(MERLIN_INFO)/samples.npy From 420f95481a83cf6b50f177f35cc90696cf423a30 Mon Sep 17 00:00:00 2001 From: Brian Gunnarson Date: Wed, 5 Jun 2024 13:49:06 -0700 Subject: [PATCH 5/6] update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ca60ba17..1e1869e18 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,12 +26,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Applying filters for `merlin detailed-status` will now log debug statements instead of warnings - Modified the unit tests for the `merlin status` command to use pytest rather than unittest - Added fixtures for `merlin status` tests that copy the workspace to a temporary directory so you can see exactly what's run in a test +- Batch block and workers now allow for variables to be used in node settings ### Fixed - Bugfix for output of `merlin example openfoam_wf_singularity` - A bug with the CHANGELOG detection test when the target branch isn't in the ci runner history - Link to Merlin banner in readme - Issue with escape sequences in ascii art (caught by python 3.12) +- Bug where Flux wasn't identifying total number of nodes on an allocation ## [1.12.1] From 056fe06b8f6b1afb29fa95c1c2c85e5a7bfc7ad2 Mon Sep 17 00:00:00 2001 From: Brian Gunnarson Date: Wed, 5 Jun 2024 13:56:36 -0700 Subject: [PATCH 6/6] run fix-style --- tests/integration/definitions.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/integration/definitions.py b/tests/integration/definitions.py index a1959b62b..59c1fa256 100644 --- a/tests/integration/definitions.py +++ b/tests/integration/definitions.py @@ -297,12 +297,9 @@ def define_tests(): # pylint: disable=R0914,R0915 }, "run-workers echo variable for worker nodes": { "cmds": f"{workers_flux} {flux_native} --echo", - "conditions": [ - HasReturnCode(), - HasRegex(r"-N 4") - ], + "conditions": [HasReturnCode(), HasRegex(r"-N 4")], "run type": "local", - } + }, } wf_format_tests = { "local minimum_format": {