Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BREAKING: Support SLURM 21.08 and breaking changes (see description) #32

Merged
merged 9 commits into from
Dec 2, 2021
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ This module is designed to work with SLURM 20.11.x.
| --------------| -----------------------------|
| 20.02.x | 0.x |
| 20.11.x | 1.x |
| 21.08.x | 2.x |

## Usage

Expand Down Expand Up @@ -182,6 +183,12 @@ slurm::database: false
slurm::slurmctld: true
```

If you wish to enable configless SLURM:

```yaml
slurm::enable_configless: true
```

### Role: slurmd

The following enables a host to act as a slurmd compute node
Expand All @@ -194,6 +201,12 @@ slurm::slurmctld: false
slurm::slurmd: true
```

To have slurmd pull configs via configless SLURM:

```yaml
slurm::configless: true
```

### Role: client

If the majority of your configuration is done in `common.yaml` then the default for `slurm::client` of `true` is sufficient to configure a host to act as a SLURM client.
Expand Down
6 changes: 5 additions & 1 deletion manifests/common/config.pp
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@
}
}

if $slurm::client and ($slurm::cli_filter_lua_source or $slurm::cli_filter_lua_content) {
if ($slurm::client or $slurm::slurmctld) and ($slurm::cli_filter_lua_source or $slurm::cli_filter_lua_content) {
file { "${slurm::conf_dir}/cli_filter.lua":
ensure => 'file',
owner => 'root',
Expand All @@ -151,6 +151,10 @@
source => $slurm::cli_filter_lua_source,
content => $slurm::cli_filter_lua_content,
}

if $slurm::slurmctld and $slurm::enable_configless {
File["${slurm::conf_dir}/cli_filter.lua"] ~> Exec['scontrol reconfig']
}
}

if ('auth/jwt' in $slurm::auth_alt_types) and ($slurm::slurmctld or $slurm::slurmdbd) {
Expand Down
57 changes: 35 additions & 22 deletions manifests/init.pp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
# @param slurmdbd_restart_on_failure
# @param reload_services
# @param restart_services
# @param slurmctld_conn_validator_timeout
# @param manage_slurm_user
# @param slurm_user_group
# @param slurm_group_gid
Expand Down Expand Up @@ -63,8 +64,6 @@
# @param cli_filter_lua_content
# @param state_dir_nfs_device
# @param state_dir_nfs_options
# @param job_checkpoint_dir_nfs_device
# @param job_checkpoint_dir_nfs_options
# @param job_submit_lua_source
# @param job_submit_lua_content
# @param cluster_name
Expand All @@ -74,6 +73,7 @@
# @param log_dir
# @param env_dir
# @param spank_plugins
# @param enable_configless
# @param configless
# @param conf_server
# @param slurm_conf_override
Expand All @@ -95,7 +95,6 @@
# @param job_containers
# @param slurmd_log_file
# @param slurmd_spool_dir
# @param job_checkpoint_dir
# @param slurmctld_log_file
# @param state_save_location
# @param slurmdbd_archive_dir
Expand Down Expand Up @@ -143,6 +142,7 @@
# @param cgroup_conf_source
# @param cgroup_automount
# @param cgroup_mountpoint
# @param cgroup_plugin
# @param cgroup_allowed_kmem_space
# @param cgroup_allowed_ram_space
# @param cgroup_allowed_swap_space
Expand All @@ -157,7 +157,6 @@
# @param cgroup_memory_swappiness
# @param cgroup_min_kmem_space
# @param cgroup_min_ram_space
# @param cgroup_task_affinity
# @param slurm_sh_template
# @param slurm_csh_template
# @param profile_d_env_vars
Expand Down Expand Up @@ -197,7 +196,7 @@
Boolean $install_pam = true,

# Source install
String $version = '20.11.8',
String $version = '21.08.4',
Array $source_dependencies = [],
Array $configure_flags = [],
Boolean $source_install_manage_alternatives = true,
Expand All @@ -217,8 +216,9 @@
String $slurmdbd_options = '',
Boolean $slurmctld_restart_on_failure = true,
Boolean $slurmdbd_restart_on_failure = true,
Boolean $reload_services = true,
Boolean $restart_services = false,
Boolean $reload_services = false,
Boolean $restart_services = true,
Integer $slurmctld_conn_validator_timeout = 60,

# User and group management
$manage_slurm_user = true,
Expand Down Expand Up @@ -260,8 +260,6 @@
# Config - controller
$state_dir_nfs_device = undef,
$state_dir_nfs_options = 'rw,sync,noexec,nolock,auto',
$job_checkpoint_dir_nfs_device = undef,
$job_checkpoint_dir_nfs_options = 'rw,sync,noexec,nolock,auto',
$job_submit_lua_source = undef,
$job_submit_lua_content = undef,

Expand All @@ -276,6 +274,7 @@
Stdlib::Absolutepath $env_dir = '/etc/sysconfig',

# configless
Boolean $enable_configless = false,
Boolean $configless = false,
Optional[String] $conf_server = undef,

Expand Down Expand Up @@ -303,7 +302,6 @@
$slurmd_spool_dir = '/var/spool/slurmd',

# slurm.conf - controller
$job_checkpoint_dir = '/var/spool/slurmctld.checkpoint',
Optional[Stdlib::Absolutepath] $slurmctld_log_file = undef,
$state_save_location = '/var/spool/slurmctld.state',

Expand Down Expand Up @@ -348,7 +346,7 @@
Array $auth_alt_types = [],
Optional[String] $jwt_key_content = undef,
Optional[String] $jwt_key_source = undef,
String $slurmrestd_listen_address = '0.0.0.0',
String $slurmrestd_listen_address = $facts['networking']['ip'],
Boolean $slurmrestd_disable_token_creation = false,
String $slurmrestd_user = 'nobody',
String $slurmrestd_user_group = 'nobody',
Expand All @@ -363,6 +361,7 @@
Optional[String] $cgroup_conf_source = undef,
Boolean $cgroup_automount = true,
Stdlib::Absolutepath $cgroup_mountpoint = '/sys/fs/cgroup',
Optional[String] $cgroup_plugin = undef,
Optional[Integer] $cgroup_allowed_kmem_space = undef,
Integer $cgroup_allowed_ram_space = 100,
Integer $cgroup_allowed_swap_space = 0,
Expand All @@ -377,7 +376,6 @@
Optional[Integer[0,100]] $cgroup_memory_swappiness = undef,
Integer $cgroup_min_kmem_space = 30,
Integer $cgroup_min_ram_space = 30,
Boolean $cgroup_task_affinity = false,

# profile.d
$slurm_sh_template = 'slurm/profile.d/slurm.sh.erb',
Expand Down Expand Up @@ -472,18 +470,31 @@
$auth_alt_parameters_dbd = undef
}

if $enable_configless {
if 'SlurmctldParameters' in $slurm_conf_override and !('enable_configless' in $slurm_conf_override['SlurmctldParameters']) {
if $slurm_conf_override['SlurmctldParameters'] =~ Array {
$slurmctld_parameters = $slurm_conf_override['SlurmctldParameters'] + ['enable_configless']
} else {
$slurmctld_parameters = "${slurm_conf_override['SlurmctldParameters']},enable_configless"
}
} elsif 'SlurmctldParameters' in $slurm_conf_override {
$slurmctld_parameters = $slurm_conf_override['SlurmctldParameters']
} else {
$slurmctld_parameters = 'enable_configless'
}
} else {
$slurmctld_parameters = $slurm_conf_override['SlurmctldParameters']
}

$slurm_conf_local_defaults = {
'AccountingStorageHost' => $slurmdbd_host,
'AccountingStoragePort' => $slurmdbd_port,
'AuthAltTypes' => $auth_alt_types,
'AuthAltParameters' => $auth_alt_parameters,
'ClusterName' => $cluster_name,
'DefaultStorageHost' => $slurmdbd_host,
'DefaultStoragePort' => $slurmdbd_port,
'Epilog' => $epilog,
'EpilogSlurmctld' => undef, #TODO
'HealthCheckProgram' => $_health_check_program,
'JobCheckpointDir' => $job_checkpoint_dir,
# Must remained undefined to support configless, we save to same directory as slurm.conf
'PlugStackConfig' => undef,
'Prolog' => $prolog,
Expand All @@ -498,6 +509,7 @@
'SlurmdPort' => $slurmd_port,
'SlurmdSpoolDir' => $slurmd_spool_dir,
'SlurmSchedLogFile' => "${log_dir}/slurmsched.log",
'SlurmctldParameters' => $slurmctld_parameters,
'SlurmdUser' => $slurmd_user,
'SrunEpilog' => undef, #TODO
'SrunProlog' => undef, #TODO
Expand All @@ -506,8 +518,9 @@
'TaskProlog' => $task_prolog,
}

$_slurm_conf_override = $slurm_conf_override - ['SlurmctldParameters']
$slurm_conf_defaults = merge($::slurm::params::slurm_conf_defaults, $slurm_conf_local_defaults)
$slurm_conf = merge($slurm_conf_defaults, $slurm_conf_override)
$slurm_conf = merge($slurm_conf_defaults, $_slurm_conf_override)

$slurmdbd_conf_local_defaults = {
'ArchiveDir' => $slurmdbd_archive_dir,
Expand Down Expand Up @@ -569,18 +582,18 @@
$state_dir_systemd = undef
}

if $job_checkpoint_dir_nfs_device {
$checkpoint_dir_systemd = "RequiresMountsFor=${slurm::job_checkpoint_dir}"
} else {
$checkpoint_dir_systemd = undef
}

if $slurmdbd_archive_dir_nfs_device {
$slurmdbd_archive_dir_systemd = "RequiresMountsFor=${slurm::slurmdbd_archive_dir}"
} else {
$slurmdbd_archive_dir_systemd = undef
}

if $use_syslog {
$logging_systemd_override = 'present'
} else {
$logging_systemd_override = 'absent'
}

if $database {
contain slurm::slurmdbd::db
}
Expand Down
9 changes: 6 additions & 3 deletions manifests/node.pp
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@
# @param cpu_bind
# @param cpus
# @param cpu_spec_list
# @param feature
# @param features
# @param gres
# @param mem_spec_limit
# @param port
# @param real_memory
# @param reason
# @param sockets
# @param sockets_per_board
# @param state
Expand All @@ -37,11 +38,12 @@
$cpu_bind = undef,
$cpus = undef,
$cpu_spec_list = undef,
$feature = undef,
$features = undef,
$gres = undef,
$mem_spec_limit = undef,
$port = undef,
$real_memory = undef,
$reason = undef,
$sockets = undef,
$sockets_per_board = undef,
Slurm::NodeState $state = 'UNKNOWN',
Expand All @@ -65,11 +67,12 @@
'CpuBind' => $cpu_bind,
'CPUs' => $cpus,
'CpuSpecList' => $cpu_spec_list,
'Feature' => $feature,
'Features' => $features,
'Gres' => $gres,
'MemSpecLimit' => $mem_spec_limit,
'Port' => $port,
'RealMemory' => $real_memory,
'Reason' => $reason,
'Sockets' => $sockets,
'SocketsPerBoard' => $sockets_per_board,
'State' => $state,
Expand Down
7 changes: 5 additions & 2 deletions manifests/params.pp
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@
$slurm_conf_defaults = {
'AccountingStorageBackupHost' => undef,
'AccountingStorageEnforce' => undef,
'AccountingStorageExternalHost' => undef,
'AccountingStorageParameters' => undef,
'AccountingStoragePass' => undef,
'AccountingStoragePort' => undef,
'AccountingStorageTRES' => undef,
'AccountingStorageType' => 'accounting_storage/slurmdbd',
'AccountingStorageUser' => undef,
'AccountingStoreJobComment' => 'YES',
'AccountingStoreFlags' => undef,
'AcctGatherNodeFreq' => '0',
'AcctGatherEnergyType' => undef,
'AcctGatherInterconnectType' => undef,
Expand All @@ -26,6 +27,8 @@
'AuthInfo' => undef,
'AuthType' => 'auth/munge',
'BatchStartTimeout' => '10',
'BcastExclude' => undef,
'BcastParameters' => undef,
'BurstBufferType' => undef,
'CliFilterPlugins' => undef,
'CommunicationParameters' => undef,
Expand Down Expand Up @@ -132,7 +135,6 @@
'ResvOverRun' => '0',
'ReturnToService' => '0',
'RoutePlugin' => 'route/default',
'SbcastParameters' => undef,
'SchedulerParameters' => undef,
'SchedulerTimeSlice' => '30',
'SchedulerType' => 'sched/backfill',
Expand Down Expand Up @@ -197,6 +199,7 @@
'DebugFlags' => undef,
'DebugLevel' => 'info',
'DebugLevelSyslog' => 'info',
'DefaultQOS' => undef,
'LogTimeFormat' => 'iso8601_ms',
'MaxQueryTimeRange' => 'INFINITE',
'MessageTimeout' => '10',
Expand Down
Loading