From d21e90a375a7b21f73f44c5267461f2538f81f21 Mon Sep 17 00:00:00 2001 From: Zhijian Liu <5782437+zhijian-liu@users.noreply.github.com> Date: Mon, 29 Jul 2024 23:39:37 -0400 Subject: [PATCH] Support W&B resume (#145) --- scripts/setups/train.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/setups/train.sh b/scripts/setups/train.sh index 912aa0bf..ca96562b 100644 --- a/scripts/setups/train.sh +++ b/scripts/setups/train.sh @@ -17,8 +17,10 @@ OUTPUT_DIR=${OUTPUT_DIR:-"runs/$RUN_NAME"} echo "OUTPUT_DIR = $OUTPUT_DIR" export WANDB_PROJECT="vila" -export WANDB_NAME=$RUN_NAME export WANDB_DIR=$OUTPUT_DIR +export WANDB_RUN_ID=$RUN_NAME +export WANDB_NAME=$RUN_NAME +export WANDB_RESUME="allow" NNODES=${SLURM_JOB_NUM_NODES:-1} echo "NNODES = $NNODES"