forked from soheeyang/deepest-debug-challenge
-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.py
154 lines (119 loc) · 5.72 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
'''
WARNING: This code is full of bugs. Can you squash them all?
We've created a super-awesome sentiment classification tool
that recognizes whether a movie review is good or bad.
However, it does not work as expected... Why?
'''
import sys
import random
import csv
import numpy as np
import torch
import torch.optim as optim
import torch.utils.data
from tqdm import tqdm
from prepro import prepro_filename
import data_utils
from model import *
# this line automatically determines which device to use
# if you have a fancy NVIDIA GPU the code uses its horsepower.
# if not, it's fine: the code uses CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# we use Naver Sentiment Movie Corpus v1.0
# from Lucy Park's [nsmc](https://github.com/e9t/nsmc)
# which is a dataset for binary sentiment classification of movie reviews.
class NsmcDataset(torch.utils.data.Dataset):
def __init__(self, dtype):
filename = prepro_filename # data/prepro.npz
assert dtype in ['train', 'test']
with np.load(filename) as f:
self.x, self.y = f[f'x_{dtype}'], f[f'y_{dtype}']
def __getitem__(self, ind):
return self.x[ind], self.y[ind]
def __len__(self):
assert len(self.x) == len(self.y)
return len(self.x)
# it is not necessary to change the number of epochs to make the code work.
# only one epoch is enough to see if the model works.
def main(epochs=1):
# define the train & test pytorch DataLoader
train_loader = torch.utils.data.DataLoader(
NsmcDataset('train'),
batch_size=64,
shuffle=False)
test_loader = torch.utils.data.DataLoader(
NsmcDataset('test'),
batch_size=64,
shuffle=False)
##########################################################################################
############################## Neural Network Definition & Training ##############################
# define the model
# .to(device) automatically loads the model to the pre-defined device (GPU or CPU)
neural_net = CnnClassifier(len(data_utils.vocabs)).to(device)
# we use an optimizer that trains the model
# heard that Adam is good, so use it
optimizer = optim.Adam(neural_net.parameters(), lr=1)
# now we defined all the necessary things, let's train the model
print('\n' + 'training phase')
neural_net.train()
for epoch in range(epochs):
for batch_ind, (input_data, target_data) in enumerate(train_loader):
# pytorch needs to "zero-fill" the gradients at each train step
# otherwise, the model adds up the gradients: not what you would expect
neural_net.zero_grad()
# put the input & target data to the auto-defined device (GPU or CPU)
input_data, target_data = input_data.to(device), target_data.to(device)
# feed input data to the network
output = neural_net(input_data)
# we define how well the model performed by comparing the output to target data
# cross entropy is a natural choice
# first, convert the target data to one-hot
target_data_onehot = torch.zeros(target_data.size(0), 2).to(device)
target_data_onehot.scatter_(1, target_data.unsqueeze(1), 1)
# then, calculate the cross entropy error
loss = -torch.mean(torch.sum(torch.mul(target_data_onehot, torch.log(output)), dim=0))
# train the model using backpropagation
loss.backward()
optimizer.step()
# print the train log at every step
if batch_ind % 1 == 0:
train_log = 'Epoch {:2d}/{:2d}\tLoss: {:.6f}\tTrain: [{}/{} ({:.0f}%)]'.format(
epoch, epochs, loss.cpu().item(), batch_ind, len(train_loader),
100. * batch_ind / len(train_loader))
print(train_log, end='\r')
sys.stdout.flush()
##########################################################################################
############################## Evaluation of the Trained Neural Network ##############################
print('\n' + 'evaluation phase')
neural_net.eval()
# let's test the trained AI: feed the test data and get the test accuracy
correct = 0.
test_loss = 0.
# pytorch uses no_grad() context manager for evaluation phase: it does not store the history & grads
# so it's much faster and memory-efficient
with torch.no_grad():
with tqdm(total=len(test_loader)) as pbar:
for batch_ind, (input_data, target_data) in enumerate(test_loader):
# same as training phase
input_data, target_data = input_data.to(device), target_data.to(device)
output = neural_net(input_data)
# get the index of the max probability
pred = output.argmax(dim=-1)
# add up prediction results
correct += pred.eq(target_data.view_as(pred)).cpu().sum()
# calculate cross entropy loss for target data: same as training
target_data_onehot = torch.zeros(target_data.size(0), 2).to(device)
target_data_onehot.scatter_(1, target_data.unsqueeze(1), 1)
test_loss += -torch.sum(torch.sum(torch.mul(target_data_onehot, torch.log(output)), dim=0))
pbar.update(1)
# average out the test results
print('test loss:', float(test_loss) / len(test_loader.dataset))
print('test accuracy:', 100. * int(correct) / len(test_loader.dataset))
if __name__ == '__main__':
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
if device == 'cuda':
torch.cuda.manual_seed(seed)
main(epochs=1)