-
Notifications
You must be signed in to change notification settings - Fork 29
/
train.py
155 lines (122 loc) · 5.99 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import os
import time
import math
import torch
from eval import *
import torch.nn as nn
from utils import evalIoU
from networks import get_model
from torch.autograd import Variable
from dataloader.dataset import NeoData
from torch.utils.data import DataLoader
from dataloader.transform import MyTransform
from torchvision.transforms import ToPILImage
from options.train_options import TrainOptions
from torch.optim import SGD, Adam, lr_scheduler
from criterion.criterion import CrossEntropyLoss2d
NUM_CHANNELS = 3
def get_loader(args):
#add the weight of each class (1/ln(c+Pclass))
#calculate the weights of each class
#weight[0]=1.45
##weight[1]=54.38
#weight[2] = 428.723
imagepath_train = os.path.join(args.datadir, 'train/image.txt')
labelpath_train = os.path.join(args.datadir, 'train/label.txt')
imagepath_val = os.path.join(args.datadir, 'val/image.txt')
labelpath_val = os.path.join(args.datadir, 'val/label.txt')
train_transform = MyTransform(reshape_size=(500,350),crop_size=(448,320), augment=True) # data transform for training set with data augmentation, including resize, crop, flip and so on
val_transform = MyTransform(reshape_size=(500,350),crop_size=(448,320), augment=False) #data transform for validation set without data augmentation
dataset_train = NeoData(imagepath_train, labelpath_train, train_transform) #DataSet
dataset_val = NeoData(imagepath_val, labelpath_val, val_transform)
loader = DataLoader(dataset_train, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=True)
loader_val = DataLoader(dataset_val, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=False)
return loader, loader_val
def train(args, model):
NUM_CLASSES = args.num_classes #pascal=21, cityscapes=20
savedir = args.savedir
weight = torch.ones(NUM_CLASSES)
loader, loader_val = get_loader(args)
if args.cuda:
criterion = CrossEntropyLoss2d(weight).cuda()
else:
criterion = CrossEntropyLoss2d(weight)
#save log
automated_log_path = savedir + "/automated_log.txt"
if (not os.path.exists(automated_log_path)): #dont add first line if it exists
with open(automated_log_path, "a") as myfile:
myfile.write("Epoch\t\tTrain-loss\t\tTest-loss\t\tTrain-IoU\t\tTest-IoU\t\tlearningRate")
optimizer = Adam(model.parameters(), args.lr, (0.9, 0.999), eps=1e-08, weight_decay=1e-4)
lambda1 = lambda epoch: pow((1-((epoch-1)/args.num_epochs)),0.9)
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1) # learning rate changed every epoch
start_epoch = 1
for epoch in range(start_epoch, args.num_epochs+1):
print("----- TRAINING - EPOCH", epoch, "-----")
scheduler.step(epoch)
epoch_loss = []
time_train = []
#confmatrix for calculating IoU
confMatrix = evalIoU.generateMatrixTrainId(evalIoU.args)
perImageStats = {}
nbPixels = 0
usedLr = 0
#for param_group in optimizer.param_groups:
for param_group in optimizer.param_groups:
print("LEARNING RATE: ", param_group['lr'])
usedLr = float(param_group['lr'])
model.train()
count = 1
for step, (images, labels) in enumerate(loader):
start_time = time.time()
if args.cuda:
images = images.cuda()
labels = labels.cuda()
inputs = Variable(images)
targets = Variable(labels)
outputs = model(inputs)
loss = criterion(outputs, targets[:, 0])
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_loss.append(loss.data[0])
time_train.append(time.time() - start_time)
#Add outputs to confusion matrix #CODE USING evalIoU.py remade from cityscapes/scripts/evaluation/evalPixelLevelSemanticLabeling.py
if (args.iouTrain):
add_to_confMatrix(outputs, labels, confMatrix, perImageStats, nbPixels)
if args.steps_loss > 0 and step % args.steps_loss == 0:
average = sum(epoch_loss) / len(epoch_loss)
print('loss: {} (epoch: {}, step: {})'.format(average,epoch,step),
"// Avg time/img: %.4f s" % (sum(time_train) / len(time_train) / args.batch_size))
average_epoch_loss_train = sum(epoch_loss) / len(epoch_loss)
iouAvgStr, iouTrain, classScoreList = cal_iou(evalIoU, confMatrix)
print ("EPOCH IoU on TRAIN set: ", iouAvgStr)
# calculate eval-loss and eval-IoU
average_epoch_loss_val, iouVal = eval(args, model, loader_val, criterion, epoch)
#save model every X epoch
if epoch % args.epoch_save==0:
torch.save(model.state_dict(), '{}_{}.pth'.format(os.path.join(args.savedir,args.model),str(epoch)))
#save log
with open(automated_log_path, "a") as myfile:
myfile.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.8f" % (epoch, average_epoch_loss_train, average_epoch_loss_val, iouTrain, iouVal, usedLr ))
return(model)
def main(args):
'''
Train the model and record training options.
'''
savedir = '{}'.format(args.savedir)
modeltxtpath = os.path.join(savedir,'model.txt')
if not os.path.exists(savedir):
os.makedirs(savedir)
with open(savedir + '/opts.txt', "w") as myfile: #record options
myfile.write(str(args))
model = get_model(args) #load model
with open(modeltxtpath, "w") as myfile: #record model
myfile.write(str(model))
if args.cuda:
model = model.cuda()
print("========== TRAINING ===========")
model = train(args,model)
print("========== TRAINING FINISHED ===========")
if __name__ == '__main__':
parser = TrainOptions().parse()
main(parser)