From 9a24d4a45ffd8564afea2f25d6e12169c5de5d24 Mon Sep 17 00:00:00 2001 From: "Walter.Kolczynski" Date: Fri, 14 Apr 2023 19:26:17 -0500 Subject: [PATCH] Split MPMD stdout into tasks on slurm It can be difficult to debug MPMD jobs because their logs are all written concurrently to a single file. While the use of tags to designate which task via the preamble and PS4 can help identify which line is from which task, it is still difficult to follow a single task through the log, particularly for larger MPMD jobs with dozens of tasks. Individual stdout files are now created by using the `srun` `--output` option. These files are written to the working directory (in `$DATA`). Fixes: #1468 --- env/HERA.env | 2 +- env/JET.env | 2 +- env/ORION.env | 2 +- env/S4.env | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/env/HERA.env b/env/HERA.env index a42e3a0170..3fa6288200 100755 --- a/env/HERA.env +++ b/env/HERA.env @@ -17,7 +17,7 @@ step=$1 export npe_node_max=40 export launcher="srun -l --export=ALL" -export mpmd_opt="--multi-prog" +export mpmd_opt="--multi-prog --output=${step}.%J.%t.out" # Configure MPI environment #export I_MPI_ADJUST_ALLREDUCE=5 diff --git a/env/JET.env b/env/JET.env index 4035e8414c..66d9ed9a3b 100755 --- a/env/JET.env +++ b/env/JET.env @@ -20,7 +20,7 @@ elif [[ "${PARTITION_BATCH}" = "vjet" ]]; then export npe_node_max=16 fi export launcher="srun -l --export=ALL" -export mpmd_opt="--multi-prog" +export mpmd_opt="--multi-prog --output=${step}.%J.%t.out" # Configure STACK export OMP_STACKSIZE=2048000 diff --git a/env/ORION.env b/env/ORION.env index 04b1344609..43aa24689d 100755 --- a/env/ORION.env +++ b/env/ORION.env @@ -17,7 +17,7 @@ step=$1 export npe_node_max=40 export launcher="srun -l --export=ALL" -export mpmd_opt="--multi-prog" +export mpmd_opt="--multi-prog --output=${step}.%J.%t.out" # Configure MPI environment export MPI_BUFS_PER_PROC=2048 diff --git a/env/S4.env b/env/S4.env index e48cde8505..4933fb989e 100755 --- a/env/S4.env +++ b/env/S4.env @@ -22,7 +22,7 @@ elif [[ ${PARTITION_BATCH} = "ivy" ]]; then export npe_node_max=20 fi export launcher="srun -l --export=ALL" -export mpmd_opt="--multi-prog" +export mpmd_opt="--multi-prog --output=${step}.%J.%t.out" # Configure MPI environment export OMP_STACKSIZE=2048000