Skip to content

Commit

Permalink
Merge pull request #180 from jedwards4b/refactor/config_batch
Browse files Browse the repository at this point in the history
split config_batch.xml per machine
  • Loading branch information
jedwards4b authored Aug 8, 2024
2 parents 4a53875 + 096a3d6 commit dd04adf
Show file tree
Hide file tree
Showing 34 changed files with 486 additions and 500 deletions.
10 changes: 10 additions & 0 deletions machines/aleph/config_batch.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<batch_system MACH="aleph" type="pbs" >
<directives>
<directive>-l nodes={{ num_nodes }}</directive>
<directive>-q iccp</directive>
<directive> -V </directive>
</directives>
<queues>
<queue walltimemax="24:00:00" default="true" >iccp</queue>
</queues>
</batch_system>
20 changes: 20 additions & 0 deletions machines/athena/config_batch.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<!-- athena is lsf -->
<batch_system MACH="athena" type="lsf">
<submit_args>
<argument> -q $JOB_QUEUE </argument>
<argument> -W $JOB_WALLCLOCK_TIME </argument>
<argument> -P $PROJECT </argument>
</submit_args>
<directives>
<directive > -R "span[ptile={{ tasks_per_node }}]"</directive>
<directive > -N </directive>
<directive default="poe" > -a {{ poe }} </directive>
</directives>
<queues>
<queue walltimemin="00:00" walltimemax="02:00">poe_short</queue>
<queue walltimemin="02:00" walltimemax="04:00" default="true">poe_medium</queue>
<queue walltimemin="04:00" walltimemax="08:00">poe_long</queue>
</queues>
</batch_system>


12 changes: 12 additions & 0 deletions machines/aws-hpc6a/config_batch.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<batch_system MACH="aws-hpc6a" type="slurm">
<batch_submit>sbatch</batch_submit>
<submit_args>
<argument> --time $JOB_WALLCLOCK_TIME </argument>
<argument> -p $JOB_QUEUE </argument>
</submit_args>
<queues>
<queue walltimemax="144:00:00" nodemin="1" nodemax="96">regular</queue>
<queue walltimemax="1:00:00" nodemin="1" nodemax="1">build</queue>
<queue default="true" walltimemax="4:00:00" jobmin="1" jobmax="1">serial</queue>
</queues>
</batch_system>
21 changes: 21 additions & 0 deletions machines/casper/config_batch.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<!-- casper pbs -->
<batch_system MACH="casper" type="pbs">
<batch_submit>qsub</batch_submit>
<submit_args>
<argument> -l gpu_type=$GPU_TYPE </argument>
</submit_args>
<directives queue="casper" compiler="nvhpc" gpu_enabled="true">
<directive default="/bin/bash" > -S {{ shell }} </directive>
<directive> -l select={{ num_nodes }}:ncpus={{ max_tasks_per_node }}:mpiprocs={{ tasks_per_node }}:ompthreads={{ thread_count }}:mem=700GB:ngpus={{ ngpus_per_node }}:mps=1 </directive>
</directives>
<directives queue="casper" gpu_enabled="false">
<directive default="/bin/bash" > -S {{ shell }} </directive>
<directive> -l select={{ num_nodes }}:ncpus={{ max_tasks_per_node }}:mpiprocs={{ tasks_per_node }}:ompthreads={{ thread_count }}:mem=300GB:ngpus={{ ngpus_per_node }} </directive>
</directives>
<!-- Unknown queues use the batch directives for the regular queue -->
<unknown_queue_directives>casper</unknown_queue_directives>
<queues>
<queue walltimemax="12:00:00" nodemin="1" nodemax="10">casper</queue>
</queues>
</batch_system>

24 changes: 24 additions & 0 deletions machines/coeus/config_batch.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<!-- coeus slurm -->
<batch_system MACH="coeus" type="slurm" >
<batch_query per_job_arg="-j">squeue</batch_query>
<batch_submit>sbatch</batch_submit>
<batch_cancel>scancel</batch_cancel>
<batch_directive>#SBATCH</batch_directive>
<jobid_pattern>(\d+)$</jobid_pattern>
<depend_separator>,</depend_separator>
<walltime_format>%H:%M:%S</walltime_format>
<batch_mail_flag>--mail-user</batch_mail_flag>
<batch_mail_type_flag>--mail-type</batch_mail_type_flag>
<batch_mail_type>none, all, begin, end, fail</batch_mail_type>
<directives>
<directive> --job-name={{ job_id }}</directive>
<directive> --nodes={{ num_nodes }}</directive>
<directive> --ntasks-per-node={{ tasks_per_node }}</directive>
<directive> --output={{ job_id }} </directive>
<directive> --exclusive </directive>
</directives>
<queues>
<queue nodemin="1" nodemax="96" default="true">medium</queue>
</queues>
</batch_system>

Loading

0 comments on commit dd04adf

Please sign in to comment.