diff --git a/README.md b/README.md index 18c5853f..f421e5e7 100644 --- a/README.md +++ b/README.md @@ -126,6 +126,13 @@ used to supplement or override the template defaults. Templated parameters can also be removed by setting the value to the literal string `'omit'` - note that this is *not the same* as the Ansible `omit` [special variable](https://docs.ansible.com/ansible/latest/reference_appendices/special_variables.html#term-omit). +`openhpc_mpi_config`: Optional. Mapping of additional parameters and values for +[mpi.conf](https://slurm.schedmd.com/mpi.conf.html). Keys are mpi.conf +parameter names and values are lists or strings as appropriate. This can be +used to supplement or override the template defaults. Templated parameters can +also be removed by setting the value to the literal string `'omit'` - note +that this is *not the same* as the Ansible `omit` [special variable](https://docs.ansible.com/ansible/latest/reference_appendices/special_variables.html#term-omit). + `openhpc_ram_multiplier`: Optional, default `0.95`. Multiplier used in the calculation: `total_memory * openhpc_ram_multiplier` when setting `RealMemory` for the partition in slurm.conf. Can be overriden on a per partition basis using `openhpc_slurm_partitions.ram_multiplier`. Has no effect if `openhpc_slurm_partitions.ram_mb` is set. `openhpc_state_save_location`: Optional. Absolute path for Slurm controller state (`slurm.conf` parameter [StateSaveLocation](https://slurm.schedmd.com/slurm.conf.html#OPT_StateSaveLocation)) diff --git a/defaults/main.yml b/defaults/main.yml index 8f597ec3..29e33adf 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -51,6 +51,9 @@ openhpc_cgroup_config: {} openhpc_gres_template: gres.conf.j2 openhpc_cgroup_template: cgroup.conf.j2 +openhpc_mpi_template: mpi.conf.j2 +openhpc_mpi_config: {} + openhpc_state_save_location: /var/spool/slurm openhpc_slurmd_spool_dir: /var/spool/slurm openhpc_slurm_conf_path: /etc/slurm/slurm.conf diff --git a/tasks/runtime.yml b/tasks/runtime.yml index 5edd5fc0..c2e30d45 100644 --- a/tasks/runtime.yml +++ b/tasks/runtime.yml @@ -103,6 +103,20 @@ register: ohpc_cgroup_conf # NB uses restart rather than reload as this is needed in some cases +- name: Template mpi.conf + template: + src: "{{ openhpc_mpi_template }}" + dest: "{{ openhpc_slurm_conf_path | dirname }}/mpi.conf" + owner: root + group: root + mode: "0644" + when: + - openhpc_enable.control | default(false) + - openhpc_mpi_config | length > 0 + notify: + - Restart slurmctld service + register: ohpc_mpi_conf + # Workaround for https://bugs.rockylinux.org/view.php?id=10165 - name: Fix permissions on /etc for Munge service ansible.builtin.file: @@ -134,6 +148,7 @@ when: - openhpc_slurm_control_host in ansible_play_hosts - hostvars[openhpc_slurm_control_host].ohpc_slurm_conf.changed or + hostvars[openhpc_slurm_control_host].ohpc_mpi_conf.changed or hostvars[openhpc_slurm_control_host].ohpc_cgroup_conf.changed or hostvars[openhpc_slurm_control_host].ohpc_gres_conf.changed # noqa no-handler notify: diff --git a/templates/mpi.conf.j2 b/templates/mpi.conf.j2 new file mode 100644 index 00000000..986b62cf --- /dev/null +++ b/templates/mpi.conf.j2 @@ -0,0 +1,6 @@ +# {{ ansible_managed }} +{% for k, v in openhpc_mpi_config.items %} +{% if v != "omit" %}{# allow removing items using setting key: omit #} +{{ k }}={{ v | join(',') if (v is sequence and v is not string) else v }} +{% endif %} +{% endfor %}