diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ca60ba1..1e1869e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,12 +26,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Applying filters for `merlin detailed-status` will now log debug statements instead of warnings - Modified the unit tests for the `merlin status` command to use pytest rather than unittest - Added fixtures for `merlin status` tests that copy the workspace to a temporary directory so you can see exactly what's run in a test +- Batch block and workers now allow for variables to be used in node settings ### Fixed - Bugfix for output of `merlin example openfoam_wf_singularity` - A bug with the CHANGELOG detection test when the target branch isn't in the ci runner history - Link to Merlin banner in readme - Issue with escape sequences in ascii art (caught by python 3.12) +- Bug where Flux wasn't identifying total number of nodes on an allocation ## [1.12.1] diff --git a/merlin/spec/merlinspec.json b/merlin/spec/merlinspec.json index 4b8ca363..7e9c912e 100644 --- a/merlin/spec/merlinspec.json +++ b/merlin/spec/merlinspec.json @@ -221,7 +221,8 @@ "nodes": { "anyOf": [ {"type": "null"}, - {"type": "integer", "minimum": 1} + {"type": "integer", "minimum": 1}, + {"type": "string", "pattern": "^\\$\\(\\w+\\)$"} ] }, "batch": { @@ -279,7 +280,12 @@ "launch_pre": {"type": "string", "minLength": 1}, "launch_args": {"type": "string", "minLength": 1}, "worker_launch": {"type": "string", "minLength": 1}, - "nodes": {"type": "integer", "minimum": 1}, + "nodes": { + "anyOf": [ + {"type": "integer", "minimum": 1}, + {"type": "string","pattern": "^\\$\\(\\w+\\)$"} + ] + }, "walltime": { "anyOf": [ {"type": "string", "minLength": 1}, diff --git a/merlin/study/batch.py b/merlin/study/batch.py index 4c8f3e49..b8c7cc95 100644 --- a/merlin/study/batch.py +++ b/merlin/study/batch.py @@ -134,6 +134,14 @@ def get_node_count(default=1): the environment cannot be determined. :param returns: (int) The number of nodes to use. """ + + # If flux is the scheduler, we can get the size of the allocation with this + try: + get_size_proc = subprocess.run("flux getattr size", shell=True, capture_output=True, text=True) + return int(get_size_proc.stdout) + except Exception: + pass + if "SLURM_JOB_NUM_NODES" in os.environ: return int(os.environ["SLURM_JOB_NUM_NODES"]) diff --git a/tests/integration/definitions.py b/tests/integration/definitions.py index 51c7fe6b..59c1fa25 100644 --- a/tests/integration/definitions.py +++ b/tests/integration/definitions.py @@ -295,6 +295,11 @@ def define_tests(): # pylint: disable=R0914,R0915 "conditions": [HasReturnCode(), HasRegex(r"default_worker", negate=True)], "run type": "local", }, + "run-workers echo variable for worker nodes": { + "cmds": f"{workers_flux} {flux_native} --echo", + "conditions": [HasReturnCode(), HasRegex(r"-N 4")], + "run type": "local", + }, } wf_format_tests = { "local minimum_format": { diff --git a/tests/integration/test_specs/flux_par_native_test.yaml b/tests/integration/test_specs/flux_par_native_test.yaml index 8eaf4b02..6fd9021a 100644 --- a/tests/integration/test_specs/flux_par_native_test.yaml +++ b/tests/integration/test_specs/flux_par_native_test.yaml @@ -14,6 +14,7 @@ env: OUTPUT_PATH: ./studies N_SAMPLES: 10 SCRIPTS: $(SPECROOT)/../../../merlin/examples/workflows/flux/scripts + WORKER_NODES: 4 study: - description: Build the code @@ -71,6 +72,7 @@ merlin: simworkers: args: -l INFO --concurrency 1 --prefetch-multiplier 1 -Ofair steps: [runs, data] + nodes: $(WORKER_NODES) samples: column_labels: [V1, V2] file: $(MERLIN_INFO)/samples.npy