-
Notifications
You must be signed in to change notification settings - Fork 62
/
srun_gpt_all.sh
63 lines (53 loc) · 1.15 KB
/
srun_gpt_all.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/bin/sh
currenttime=`date "+%Y%m%d%H%M%S"`
if [ ! -d log ]; then
mkdir log
fi
echo "[Usage] ./srun.sh config_path [train|eval] partition gpunum"
# check config exists
if [ ! -e $1 ]
then
echo "[ERROR] configuration file: $1 does not exists!"
exit
fi
if [ ! -d ${expname} ]; then
mkdir ${expname}
fi
echo "[INFO] saving results to, or loading files from: "$expname
if [ "$3" == "" ]; then
echo "[ERROR] enter partition name"
exit
fi
partition_name=$3
echo "[INFO] partition name: $partition_name"
if [ "$4" == "" ]; then
echo "[ERROR] enter gpu num"
exit
fi
gpunum=$4
gpunum=$(($gpunum<8?$gpunum:8))
echo "[INFO] GPU num: $gpunum"
((ntask=$gpunum*3))
TOOLS="srun --mpi=pmi2 --partition=$partition_name --gres=gpu:$gpunum -n1 --job-name=${config_suffix}"
PYTHONCMD="python -u main_gpt_all.py --config $1"
if [ $2 == "train" ];
then
$TOOLS $PYTHONCMD \
--train
elif [ $2 == "eval" ];
then
$TOOLS $PYTHONCMD \
--eval
elif [ $2 == "visgt" ];
then
$TOOLS $PYTHONCMD \
--visgt
elif [ $2 == "anl" ];
then
$TOOLS $PYTHONCMD \
--anl
elif [ $2 == "sample" ];
then
$TOOLS $PYTHONCMD \
--sample
fi