forked from karpathy/llama2.c
-
Notifications
You must be signed in to change notification settings - Fork 0
/
eval_reverse_mem.sh
executable file
·70 lines (62 loc) · 2 KB
/
eval_reverse_mem.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/bin/bash
# data
task_name="tinystories_reverse"
vocab_source="custom" # llama2|custom; use Lllama 2 vocab from Meta, or custom trained
vocab_size=4096 # the Llama 2 tokenizer has 32K tokens
# eval
batch_size=32 # if gradient_accumulation_steps > 1, this is the micro-batch size
eval_iters=1000
eval_last=False
repeat_tokens=False
# model
attention_type="memory_attention"
extend_method=""
key_norm=False
memseqlen=32
do_wm=False
do_memory_ffn=True
memory_norm=True
reuse_kv=True
train_orimem=False
# memory save or use
use_saved_mem=""
update_memory=False
save_memory=""
if [ "$save_memory" ]; then
update_memory=True
fi
# I/O
out_dir=./out/etry_reverse_custom4096_len256_memory32_ffn_norm_reusekv_trainmem
out_dir=./out/infinity_repeat_custom4096_len256_memory32_ffn_norm_reusekv
mkdir -p ${out_dir}
cp $0 ${out_dir}/eval.sh
for ((i=8; i<=12; i++))
do
if [ $i -ge 14 ]; then
batch_size=8
fi
if [ $i -ge 17 ]; then
batch_size=1
eval_iters=100
fi
max_seq_len=$((2 ** i))
# max_seq_len=$((32 * i))
echo "eval $max_seq_len"
date
python3 train.py \
--task_name=${task_name} \
--batch_size=${batch_size} --max_seq_len=${max_seq_len} \
--key_norm=${key_norm} \
--vocab_source=${vocab_source} --vocab_size=${vocab_size} \
--attention_type=${attention_type} --memseqlen=${memseqlen} \
--memory_norm=${memory_norm} --do_memory_ffn=${do_memory_ffn} --do_wm=${do_wm} \
--reuse_kv=${reuse_kv} --train_orimem=${train_orimem} \
--dtype="float32" \
--device="cuda" --compile=False \
--eval_only=True --init_from="resume" --always_save_checkpoint=False \
--eval_last=${eval_last} --eval_iters=${eval_iters} \
--repeat_tokens=${repeat_tokens} \
--save_memory=${save_memory} --use_saved_mem=${use_saved_mem} --update_memory=${update_memory}\
--out_dir=${out_dir} \
| tee -a ${out_dir}/log_${use_saved_mem}_update${update_memory}_${extend_method}_${key_norm}.txt
done