forked from google-research/bert
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pretrain.sh
executable file
·78 lines (71 loc) · 2.86 KB
/
pretrain.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env bash
export BUCKET=gs://koursaros
export TEACHER_DIR=../uncased_L-12_H-768_A-12
export STUDENT_DIR=../uncased_L-4_H-768_A-12
#gsutil rm -r ${BUCKET}/pretraining_output
python3 run_pretraining.py \
--input_file=${BUCKET}/bert_pretrain_data/*.tf_record \
--output_dir=${BUCKET}/pretraining_output \
--do_train=True \
--do_eval=True \
--bert_config_file=${STUDENT_DIR}/bert_config.json \
--teacher_config_file=${TEACHER_DIR}/bert_config.json \
--teacher_checkpoint=${BUCKET}/uncased_L-12_H-768_A-12/bert_model.ckpt \
--train_batch_size=128 \
--max_seq_length=128 \
--max_predictions_per_seq=20 \
--num_train_steps=10000 \
--num_warmup_steps=1000 \
--learning_rate=2e-5 \
--distill \
--pred_distill \
--use_tpu \
--tpu_name pretrain
# BERT BASE BASELINE EVAL
#***** Eval results *****
#INFO:tensorflow: global_step = 0
#INFO:tensorflow: loss = 2.284875
#INFO:tensorflow: masked_lm_accuracy = 0.6143619
#INFO:tensorflow: masked_lm_loss = 2.2041852
#INFO:tensorflow: next_sentence_accuracy = 0.98125
#INFO:tensorflow: next_sentence_loss = 0.07830824
python3 run_pretraining.py \
--input_file=${BUCKET}/bert_pretrain_data/*.tf_record \
--output_dir=${BUCKET}/baseline_pretrain \
--do_eval=True \
--bert_config_file=${TEACHER_DIR}/bert_config.json \
--train_batch_size=128 \
--max_seq_length=128 \
--use_tpu \
--tpu_name pretrain \
--init_checkpoint ${BUCKET}/uncased_L-12_H-768_A-12/bert_model.ckpt
# SANITY CHECK EVAL TEACHER. Should be the same, e.g.
# ***** Eval results *****
#INFO:tensorflow: global_step = 0
#INFO:tensorflow: loss = 53.61523
#INFO:tensorflow: masked_lm_accuracy = 0.6143619
#INFO:tensorflow: masked_lm_loss = 2.2041852
#INFO:tensorflow: next_sentence_accuracy = 0.98125
#INFO:tensorflow: next_sentence_loss = 0.07830824
python3 run_pretraining.py \
--input_file=${BUCKET}/bert_pretrain_data/*.tf_record \
--output_dir=${BUCKET}/tmp --do_train=False --do_eval=True \
--bert_config_file=${STUDENT_DIR}/bert_config.json \
--teacher_config_file=${TEACHER_DIR}/bert_config.json \
--teacher_checkpoint=${BUCKET}/uncased_L-12_H-768_A-12/bert_model.ckpt \
--train_batch_size=128 --max_seq_length=128 \
--max_predictions_per_seq=20 --num_train_steps=100000 \
--num_warmup_steps=1000 --learning_rate=2e-5 --distill \
--use_tpu --tpu_name pretrain --eval_teacher --pred_distill
GLUE_DIR=../glue_data
MODEL=preds_distill/model.ckpt-50000
# GET MNLI for student
python3 run_classifier.py --task_name=MNLI \
--do_train=false --do_eval=true \
--data_dir=$GLUE_DIR/MNLI --vocab_file=${TEACHER_DIR}/vocab.txt \
--bert_config_file=$STUDENT_DIR/bert_config.json \
--init_checkpoint=${BUCKET}/${MODEL} \
--max_seq_length=128 --train_batch_size=32 \
--learning_rate=2e-5 --num_train_epochs=3.0 \
--output_dir=gs://koursaros/tmp/mnli_output/ \
--use_tpu --tpu_name pretrain