-
Notifications
You must be signed in to change notification settings - Fork 1
/
Makefile
151 lines (128 loc) · 4.86 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
SHELL := /bin/bash
.PHONY: clean pretrain eval score \
score_gnn_pretrain score_lm_pretrain_causal score_lm_pretrain_masked \
score_clip_graph_causal score_clip_graph_masked
clean:
find . -name '__pycache__' -not -path '*/\.git/*' -exec rm -rf {} \+
find . -name '*.pyc' -not -path '*/\.git/*' -exec rm -f {} \+
find . -name '*.pyo' -not -path '*/\.git/*' -exec rm -f {} \+
find . -name '*.egg-info' -not -path '*/\.git/*' -exec rm -rf {} \+
find . -name '*~' -not -path '*/\.git/*' -exec rm -f {} \+
find . -name tags -not -path '*/\.git/*' -exec rm -f {} \+
find . -name tags.lock -not -path '*/\.git/*' -exec rm -f {} \+
pretrain:
@ set -e; \
for c in configs/lm-pretrain/*/*.yaml; do \
bin/trainer.py fit -c "$$c"; \
done
@ set -e; \
for c in configs/gnn-pretrain*/*/*.yaml; do \
bin/trainer.py fit -c "$$c"; \
done
@ set -e; \
for c in configs/clip-graph{,-directed}/inductive-causal/*/*.yaml; do \
bin/trainer.py fit -c "$$c"; \
done
@ set -e; \
for c in configs/clip-graph{,-directed}/inductive-masked/*/*.yaml; do \
bin/trainer.py fit -c "$$c"; \
done
eval:
bin/eval.py batch -p -r -s test -d cpu --out-dir data/evals/ -f configs/comparisons.yaml
score: score_gnn_pretrain score_lm_pretrain_causal score_lm_pretrain_masked \
score_clip_graph_causal score_clip_graph_masked
check_defined = \
$(strip $(foreach 1, $1, $(call __check_defined,$1,$(strip $(value 2)))))
__check_defined = \
$(if $(value $1), , $(error Undefined $1$(if $2, ($2))))
SCORE_MSG = Must specify the split to score with environment variable \
SPLIT, acceptable values train, test, val; e.g. \
SPLIT=val make score
# whether causal or masked for the text component of the eval dataset
# in the gnn-pretrain case doesn't matter, we don't use text at all
score_gnn_pretrain:
$(call check_defined, SPLIT, ${SCORE_MSG})
@ set -e; \
for p in lightning_logs/gnn-pretrain/*; do \
for v in "$$p"/*; do \
echo "Scoring $$v..." && \
bin/score.py gnn_pretrain -i "$$v" -o data/embeds/ \
-c "configs/eval-datasets/$$(basename "$$p")/causal.yaml" \
-d cuda -s "$(SPLIT)"; \
done \
done
@ set -e; \
for p in lightning_logs/gnn-pretrain-directed/*; do \
for v in "$$p"/*; do \
echo "Scoring $$v..." && \
bin/score.py gnn_pretrain -i "$$v" -o data/embeds/ \
-c "configs/eval-datasets/$$(basename "$$p")/causal-directed.yaml" \
-d cuda -s "$(SPLIT)"; \
done \
done
# for bin/score.py pretrain_lm, we need to specify the pooling mode and
# normalization behavior. they aren't used in the text pretraining task, but
# are needed to produce these sentence embeddings and are specified in
# clip_graph. for comparability, they should be the same as used in the
# clip-graph models. see the -p and-n options to bin/score.py -- the defaults
# used here without those options are to use mean-pooling and normalization.
score_lm_pretrain_causal:
$(call check_defined, SPLIT, ${SCORE_MSG})
@ set -e; \
for p in lightning_logs/lm-pretrain/*; do \
for v in "$$p"/causal/*; do \
echo "Scoring $$v..." && \
bin/score.py lm_pretrain -i "$$v" -o data/embeds/ \
-c "configs/eval-datasets/$$(basename "$$p")/causal.yaml" \
-p -d cuda -s "$(SPLIT)"; \
done \
done
score_lm_pretrain_masked:
$(call check_defined, SPLIT, ${SCORE_MSG})
@ set -e; \
for p in lightning_logs/lm-pretrain/*; do \
for v in "$$p"/masked/*; do \
echo "Scoring $$v..." && \
bin/score.py lm_pretrain -i "$$v" -o data/embeds/ \
-c "configs/eval-datasets/$$(basename "$$p")/masked.yaml" \
-p -d cuda -s "$(SPLIT)"; \
done \
done
score_clip_graph_causal:
$(call check_defined, SPLIT, ${SCORE_MSG})
@ set -e; \
for p in lightning_logs/clip-graph/inductive-causal/*; do \
for v in "$$p"/*; do \
echo "Scoring $$v..." && \
bin/score.py clip_graph -i "$$v" -o data/embeds/ \
-c "configs/eval-datasets/$$(basename "$$p")/causal.yaml" \
-p -d cuda -s "$(SPLIT)"; \
done \
done
for p in lightning_logs/clip-graph-directed/inductive-causal/*; do \
for v in "$$p"/*; do \
echo "Scoring $$v..." && \
bin/score.py clip_graph -i "$$v" -o data/embeds/ \
-c "configs/eval-datasets/$$(basename "$$p")/causal-directed.yaml" \
-p -d cuda -s "$(SPLIT)"; \
done \
done
score_clip_graph_masked:
$(call check_defined, SPLIT, ${SCORE_MSG})
@ set -e; \
for p in lightning_logs/clip-graph/inductive-masked/*; do \
for v in "$$p"/*; do \
echo "Scoring $$v..." && \
bin/score.py clip_graph -i "$$v" -o data/embeds/ \
-c "configs/eval-datasets/$$(basename "$$p")/masked.yaml" \
-p -d cuda -s "$(SPLIT)"; \
done \
done
for p in lightning_logs/clip-graph-directed/inductive-masked/*; do \
for v in "$$p"/*; do \
echo "Scoring $$v..." && \
bin/score.py clip_graph -i "$$v" -o data/embeds/ \
-c "configs/eval-datasets/$$(basename "$$p")/masked-directed.yaml" \
-p -d cuda -s "$(SPLIT)"; \
done \
done