Skip to content

Commit

Permalink
Merge pull request #1182 from hkmc-airlab/share-slurm-conf
Browse files Browse the repository at this point in the history
Have same slurm.conf among nodes and controller
  • Loading branch information
ajdecon authored Jun 17, 2022
2 parents 255c56e + 4c8bede commit 36cd63d
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 0 deletions.
3 changes: 3 additions & 0 deletions config.example/group_vars/slurm-cluster.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ slurm_enable_nfs_client_nodes: true
nfs_server_group: "slurm-master[0]"
nfs_client_group: "slurm-master[1:],slurm-node"

# Flags for sharing slurm.conf among compute and control nodes
slurm_conf_symlink: false

################################################################################
# SOFTWARE MODULES (SM) #
# May be built with either EasyBuild or Spack #
Expand Down
4 changes: 4 additions & 0 deletions roles/slurm/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ slurm_src_url: "https://download.schedmd.com/slurm/slurm-{{ slurm_version }}.tar
slurm_build_make_clean: no
slurm_build_dir_cleanup: no
slurm_config_dir: /etc/slurm
slurmctl_config_dir: /sw/.slurm
slurm_sysconf_dir: /etc/sysconfig
slurm_install_prefix: /usr/local
slurm_configure: './configure --prefix={{ slurm_install_prefix }} --disable-dependency-tracking --disable-debug --disable-x11 --enable-really-no-cray --enable-salloc-kill-cmd --with-hdf5=no --sysconfdir={{ slurm_config_dir }} --enable-pam --with-pam_dir={{ slurm_pam_lib_dir }} --with-shared-libslurm --without-rpath --with-pmix={{ pmix_install_prefix }} --with-hwloc={{ hwloc_install_prefix }}'
Expand Down Expand Up @@ -54,6 +55,9 @@ slurm_cgroup_conf_template: "etc/slurm/cgroup.conf"
slurm_gres_conf_template: "etc/slurm/gres.conf"
slurm_dbd_conf_template: "etc/slurm/slurmdbd.conf"

# Flags for sharing slurm.conf among compute and control nodes
slurm_conf_symlink: false

# Controls the ability of the partition to execute more than one job at a time on each resource (node, socket or core depending upon the value of SelectTypeParameters). If resources are to be over-subscribed, avoiding memory over-subscription is very important. SelectTypeParameters should be configured to treat memory as a consumable resource and the --mem option should be used for job allocations. Sharing of resources is typically useful only when using gang scheduling (PreemptMode=suspend,gang). Possible values for OverSubscribe are "EXCLUSIVE", "FORCE", "YES", and "NO". Note that a value of "YES" or "FORCE" can negatively impact performance for systems with many thousands of running jobs. The default value is "NO". For more information see the following web pages:
# Sets partition OverSubscribe
# To avoid sharing nodes, set to "EXCLUSIVE"
Expand Down
13 changes: 13 additions & 0 deletions roles/slurm/tasks/compute.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,19 @@
src: "{{ slurm_conf_template }}"
dest: "{{ slurm_config_dir }}/slurm.conf"
mode: "0644"
when: not slurm_conf_symlink
notify:
- restart slurmd
tags:
- config

- name: configure slurm.conf by symbolic link
file:
path: "{{ slurm_config_dir }}/slurm.conf"
state: link
src: "{{ slurmctl_config_dir }}/slurm.conf"
force: yes
when: slurm_conf_symlink
notify:
- restart slurmd
tags:
Expand Down
32 changes: 32 additions & 0 deletions roles/slurm/tasks/controller.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,16 @@
- /var/spool/slurm/ctld
- /var/log/slurm

- name: create slurm directories for nfs sharing
file:
path: "{{ item }}"
state: directory
owner: slurm
mode: 0755
with_items:
- "{{ slurmctl_config_dir }}"
when: slurm_conf_symlink

- name: create slurm HA directory
file:
path: "{{ slurm_ha_state_save_location }}"
Expand All @@ -89,6 +99,28 @@
src: "{{ slurm_conf_template }}"
dest: "{{ slurm_config_dir }}/slurm.conf"
mode: "0644"
when: not slurm_conf_symlink
notify:
- restart slurmctld
tags:
- config

- name: configure slurm.conf to nfs
template:
src: "{{ slurm_conf_template }}"
dest: "{{ slurmctl_config_dir }}/slurm.conf"
mode: "0644"
when: slurm_conf_symlink
tags:
- config

- name: complete slurm.conf
file:
path: "{{ slurm_config_dir }}/slurm.conf"
state: link
src: "{{ slurmctl_config_dir }}/slurm.conf"
force: yes
when: slurm_conf_symlink
notify:
- restart slurmctld
tags:
Expand Down

0 comments on commit 36cd63d

Please sign in to comment.