-
Notifications
You must be signed in to change notification settings - Fork 441
/
deepspeed_gpt3_large.sh
40 lines (36 loc) · 1.05 KB
/
deepspeed_gpt3_large.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#! /bin/bash
# Model parallel size
MP_SIZE=1
# Change for multinode config
NUM_GPUS_PER_WORKER=1
gpt_options=" \
--train-data-path /path/to/train.list \
--test-data-path /path/to/test.list \
--logging-dir=log/ \
--save model \
--save-interval 1000 \
--model-parallel-size ${MP_SIZE} \
--num-layers 24 \
--hidden-size 1536 \
--num-attention-heads 16 \
--batch-size 1 \
--seq-length 2048 \
--max-position-embeddings 2048 \
--train-iters 200000 \
--resume-dataloader \
--distributed-backend nccl \
--lr 0.00015 \
--lr-decay-style cosine \
--weight-decay 1e-2 \
--warmup .01 \
--log-interval 100 \
--fp16 \
--checkpoint-activations \
--deepspeed-activation-checkpointing \
--deepspeed \
--deepspeed_config src/deepspeed_config/gpt3_large_2048.json \
"
run_cmd="USE_DEEPSPEED=1 mpirun --np ${NUM_GPUS_PER_WORKER} python ../pretrain_gpt3.py $@ ${gpt_options}"
echo "${run_cmd}"
eval "${run_cmd}"
set +x