-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.py
234 lines (189 loc) · 9.74 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
import sys
import torch
from loss.smooth_cross_entropy import smooth_crossentropy
from utility.log import Log
from utility.neighbourhood_scheduler import ExponentialDecayNeighbourhoodSchedule, StepDecayNeighbourhoodSchedule, \
StepIncreaseNeighbourhoodSchedule, ConstantNeighbourhoodSchedule
from utility.step_lr import StepLR
from utility.bypass_bn import enable_running_stats, disable_running_stats
from optimiser.sam import SAM
from data.cifar import CIFAR
import models.resnet as resnet
from utility.utils import parse_args
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
sys.path.append("..")
def setup(source_dataset, noise, batch_size, label_type, threads, optimiser_choice, learning_rate,
momentum, weight_decay, initial_rho, adaptive, rho_schedule_type, k, epochs):
""" Sets up the training process. """
dataset = CIFAR(source_dataset, batch_size, label_type, noise, threads)
model = resnet.resnet32().to(device) if source_dataset == "cifar10" \
else resnet.ResNet(resnet.BasicBlock, [5, 5, 5], 100).to(device)
if optimiser_choice == "SAM":
base_optimiser = torch.optim.SGD
optimiser = SAM(model.parameters(),
base_optimiser,
rho=initial_rho,
adaptive=adaptive,
lr=learning_rate,
momentum=momentum,
weight_decay=weight_decay)
else:
optimiser = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)
experiment_filename = f"./output/{source_dataset}_{label_type}_{str(noise).replace('.', 'p')}_" \
f"{optimiser_choice}_{adaptive}_{str(initial_rho).replace('.', 'p')}"
f = open(f"{experiment_filename}.txt", "a+")
log = Log(log_each=10, file_writer=f)
# Schedulers.
scheduler = StepLR(optimiser, learning_rate, epochs) # Learning rate scheduler.
if rho_schedule_type == "exponential":
rho_schedule = ExponentialDecayNeighbourhoodSchedule(initial_rho, epochs, k, optimiser)
elif rho_schedule_type == "stepdecay":
rho_schedule = StepDecayNeighbourhoodSchedule(initial_rho, epochs, optimiser)
elif rho_schedule_type == "stepincrease":
rho_schedule = StepIncreaseNeighbourhoodSchedule(initial_rho, epochs, optimiser)
else:
rho_schedule = ConstantNeighbourhoodSchedule(initial_rho, optimiser)
return dataset, model, optimiser, log, scheduler, rho_schedule
def train_sgd(dataset, model, optimiser, log, scheduler, nb_scheduler, epochs, label_smoothing, bootstrapped):
"""Trains model using stochastic gradient descent."""
bootstrapped_targets, all_predictions = [], []
if bootstrapped:
print(f"Bootstrapping an SGD run.\n")
bootstrapped_targets = train_sgd(dataset=dataset,
model=model,
optimiser=optimiser,
log=log,
scheduler=scheduler,
nb_scheduler=nb_scheduler,
epochs=epochs,
label_smoothing=label_smoothing,
bootstrapped=False)
for epoch in range(epochs):
model.train()
log.train(len_dataset=len(dataset.train))
# Iterate over training set.
for ind, batch in enumerate(dataset.train):
inputs, targets = (b.to(device) for b in batch)
targets = bootstrapped_targets[ind].to(device) if bootstrapped else targets
enable_running_stats(model)
predictions = model(inputs)
loss = smooth_crossentropy(predictions, targets, label_smoothing)
loss.mean().backward()
optimiser.step()
optimiser.zero_grad()
# Calculate the accuracy, log the results, and apply the learning
# rate scheduler and the neighbourhood radius scheduler.
with torch.no_grad():
correct = torch.argmax(predictions.data, 1) == targets
log(model, loss.cpu(), correct.cpu(), scheduler.lr())
scheduler(epoch)
nb_scheduler(epoch)
#####################
# TRAINING COMPLETE #
#####################
# Save predictions for bootstrapping.
with torch.no_grad():
if not bootstrapped:
for batch in dataset.train:
inputs, _ = (b.to(device) for b in batch)
# Argmax is needed because the original targets are labels,
# whereas the output here is in 10-/100-class softmax form.
all_predictions.append(torch.argmax(model(inputs), 1))
# Set the model to eval mode.
model.eval()
log.eval(len_dataset=len(dataset.test))
# Begin testing the model on the test set.
with torch.no_grad():
for batch in dataset.test:
inputs, targets = (b.to(device) for b in batch)
predictions = model(inputs)
loss = smooth_crossentropy(predictions, targets)
correct = torch.argmax(predictions, 1) == targets
log(model, loss.cpu(), correct.cpu())
# Write final statistics.
log.flush()
if bootstrapped:
return None
return all_predictions
def train_sam(dataset, model, optimiser, log, scheduler, nb_scheduler, epochs, label_smoothing, bootstrapped):
"""Trains model using sharpness-aware minimisation (SAM)."""
bootstrapped_targets, all_predictions = [], []
if bootstrapped:
log.writeline(f"Bootstrapping a SAM run.")
bootstrapped_targets = train_sam(dataset=dataset,
model=model,
optimiser=optimiser,
log=log,
scheduler=scheduler,
nb_scheduler=nb_scheduler,
epochs=epochs,
label_smoothing=label_smoothing,
bootstrapped=False)
for epoch in range(epochs):
# Set the model to training mode.
model.train()
log.train(len_dataset=len(dataset.train))
# Iterate over the batches in the training set.
for ind, batch in enumerate(dataset.train):
# Inputs is shape (batch_size, 3, 32, 32). Targets is shape (batch_size,).
inputs, targets = (b.to(device) for b in batch)
targets = bootstrapped_targets[ind].to(device) if bootstrapped else targets
# First forward-backward step: finds the adversarial point.
enable_running_stats(model)
predictions = model(inputs)
loss = smooth_crossentropy(predictions, targets, smoothing=label_smoothing)
loss.mean().backward()
optimiser.first_step(zero_grad=True)
# Second forward-backward step: using gradient value at the adversarial point,
# update the weights at the current point.
disable_running_stats(model)
smooth_crossentropy(model(inputs), targets, smoothing=label_smoothing).mean().backward()
optimiser.second_step(zero_grad=True)
# Calculate the accuracy, log the results, and apply the learning
# rate scheduler and the neighbourhood radius scheduler.
with torch.no_grad():
correct = torch.argmax(predictions.data, 1) == targets
log(model, loss.cpu(), correct.cpu(), scheduler.lr())
scheduler(epoch)
nb_scheduler(epoch)
#####################
# TRAINING COMPLETE #
#####################
# Save predictions for bootstrapping.
with torch.no_grad():
if not bootstrapped:
for batch in dataset.train:
inputs, _ = (b.to(device) for b in batch)
# Argmax is needed because the original targets are labels,
# whereas the output here is in 10-/100-class softmax form.
all_predictions.append(torch.argmax(model(inputs), 1))
# Set the model to eval mode.
model.eval()
log.eval(len_dataset=len(dataset.test))
# Begin testing the model on the test set.
with torch.no_grad():
for batch in dataset.test:
inputs, targets = (b.to(device) for b in batch)
predictions = model(inputs)
loss = smooth_crossentropy(predictions, targets)
correct = torch.argmax(predictions, 1) == targets
log(model, loss.cpu(), correct.cpu())
# Write to log the final statistics.
log.flush()
if bootstrapped:
return None
return all_predictions
def train_swa():
pass
if __name__ == '__main__':
args = parse_args()
training_params = setup(args.dataset, args.noise, args.batch_size, args.label_type, args.threads,
# Dataset arguments.
args.optimiser_choice, args.learning_rate, args.momentum, args.weight_decay, # Optimiser.
args.initial_rho, args.adaptive, args.rho_scheduler, args.k, # SAM-specific arguments.
args.epochs) # Training arguments.
# Run the training loop.
if args.optimiser_choice == "SAM":
train_sam(*training_params, args.epochs, args.label_smoothing, args.bootstrapped)
else:
train_sgd(*training_params, args.epochs, args.label_smoothing, args.bootstrapped)