-
Notifications
You must be signed in to change notification settings - Fork 8
/
ds_multi_blockta_large.sh
executable file
·48 lines (46 loc) · 1.45 KB
/
ds_multi_blockta_large.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#! /bin/bash
script_path=$(realpath $BASH_SOURCE)
script_dir=$(dirname $script_path)
config_json="$script_dir/config_blockta_multi_large.json"
gpt_options=" \
--block-lm \
--task-mask \
--bert-prob 0.5 \
--gap-sentence-prob 0.3 \
--avg-block-length 3 \
--gpt-min-ratio 0.25 \
--block-mask-prob 0.1 \
--short-seq-prob 0.02 \
--experiment-name blocklm-roberta-large-multi \
--model-parallel-size ${MP_SIZE} \
--num-layers 24 \
--hidden-size 1024 \
--num-attention-heads 16 \
--seq-length 512 \
--max-sequence-length 1025 \
--save /dataset/fd5061f6/english_data/checkpoints \
--log-interval 50 \
--eval-interval 1000 \
--save-interval 5000 \
--train-iters 500000 \
--train-data multilingual \
--dataset-temperature 0.3 \
--loader-scatter 32 \
--loader-fraction 0.1 \
--resume-dataloader \
--no-pre-tokenize \
--tokenizer-type ChineseSPTokenizer \
--tokenizer-model-type /dataset/fd5061f6/duzx16/tokenizer/mglm-unigram-250k/mglm250k-uni.model \
--split 949,50,1 \
--distributed-backend nccl \
--lr-decay-style linear \
--lr-decay-ratio 0.1 \
--lr-decay-iters 400000 \
--warmup 0.02 \
--checkpoint-activations \
--fp16 \
"
gpt_options="${gpt_options}
--deepspeed \
--deepspeed_config ${config_json} \
"