-
Notifications
You must be signed in to change notification settings - Fork 37
/
Copy pathrun_seq2seq_verbose.bash
executable file
·150 lines (140 loc) · 3.26 KB
/
run_seq2seq_verbose.bash
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/env bash
# -*- coding:utf-8 -*-
EXP_ID=$(date +%F-%H-%M-$RANDOM)
export CUDA_VISIBLE_DEVICES="0"
export batch_size="16"
export model_name=t5-base
export data_name=one_ie_ace2005_subtype
export lr=5e-5
export task_name="event"
export seed="421"
export lr_scheduler=constant_with_warmup
export label_smoothing="0"
export epoch=30
export decoding_format='tree'
export eval_steps=500
export warmup_steps=0
export constraint_decoding='--constraint_decoding'
OPTS=$(getopt -o b:d:m:i:t:s:l:f: --long batch:,device:,model:,data:,task:,seed:,lr:,lr_scheduler:,label_smoothing:,epoch:,format:,eval_steps:,warmup_steps:,wo_constraint_decoding -n 'parse-options' -- "$@")
if [ $? != 0 ]; then
echo "Failed parsing options." >&2
exit 1
fi
eval set -- "$OPTS"
while true; do
case "$1" in
-b | --batch)
batch_size="$2"
shift
shift
;;
-d | --device)
CUDA_VISIBLE_DEVICES="$2"
shift
shift
;;
-m | --model)
model_name="$2"
shift
shift
;;
-i | --data)
data_name="$2"
shift
shift
;;
-t | --task)
task_name="$2"
shift
shift
;;
-s | --seed)
seed="$2"
shift
shift
;;
-l | --lr)
lr="$2"
shift
shift
;;
-f | --format)
decoding_format="$2"
shift
shift
;;
--lr_scheduler)
lr_scheduler="$2"
shift
shift
;;
--label_smoothing)
label_smoothing="$2"
shift
shift
;;
--epoch)
epoch="$2"
shift
shift
;;
--eval_steps)
eval_steps="$2"
shift
shift
;;
--warmup_steps)
warmup_steps="$2"
shift
shift
;;
--wo_constraint_decoding)
constraint_decoding=""
shift
;;
--)
shift
break
;;
*)
echo "$1" not recognize.
exit
;;
esac
done
# google/mt5-base -> google_mt5-base
model_name_log=$(echo ${model_name} | sed -s "s/\//_/g")
model_folder=models/CF_${EXP_ID}_${model_name_log}_${decoding_format}_${data_name}_${lr_scheduler}_lr${lr}_ls${label_smoothing}_${batch_size}_wu${warmup_steps}
data_folder=data/text2${decoding_format}/${data_name}
export TOKENIZERS_PARALLELISM=false
output_dir=${model_folder}
CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES} python run_seq2seq.py \
--do_train --do_eval --do_predict ${constraint_decoding} \
--label_smoothing_sum=False \
--use_fast_tokenizer=False \
--evaluation_strategy steps \
--predict_with_generate \
--metric_for_best_model eval_role-F1 \
--save_total_limit 1 \
--load_best_model_at_end \
--max_source_length=256 \
--max_target_length=128 \
--num_train_epochs=${epoch} \
--task=${task_name} \
--train_file=${data_folder}/train.json \
--validation_file=${data_folder}/val.json \
--test_file=${data_folder}/test.json \
--event_schema=${data_folder}/event.schema \
--per_device_train_batch_size=${batch_size} \
--per_device_eval_batch_size=$((batch_size * 4)) \
--output_dir=${output_dir} \
--logging_dir=${output_dir}_log \
--model_name_or_path=${model_name} \
--learning_rate=${lr} \
--lr_scheduler_type=${lr_scheduler} \
--label_smoothing_factor=${label_smoothing} \
--eval_steps ${eval_steps} \
--decoding_format ${decoding_format} \
--warmup_steps ${warmup_steps} \
--source_prefix="${task_name}: " \
--seed=${seed}