-
Notifications
You must be signed in to change notification settings - Fork 0
/
DLNNwPPt.py
89 lines (69 loc) · 2.58 KB
/
DLNNwPPt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#import important modules
import torch
import torchvision
from torchvision import transforms, datasets
#Its really important to seperate out our datasets early
train = datasets.MNIST("", train = True, download = False,
transform = transforms.Compose([transforms.ToTensor()]))
test = datasets.MNIST("", train = False, download = False,
transform = transforms.Compose([transforms.ToTensor()]))
#we shuffle because the name of the game is generalisation (avoid overfitting (memorisation of ds))
#Be aware that we could fit the entire dataset through our model in one go (~150Mb) but we divide into 10 batches
trainset = torch.utils.data.DataLoader(train, batch_size=10, shuffle=True)
testset = torch.utils.data.DataLoader(test, batch_size=10, shuffle=False)
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super().__init__()
##input layer, output layer
#layer 1
self.fc1 = nn.Linear(28*28 , 64)
#layer 2
self.fc2 = nn.Linear(64 , 64)
#layer 3
self.fc3 = nn.Linear(64 , 64)
#layer 4 - 64 from previous, 10 numbers output
self.fc4 = nn.Linear(64 , 10)
def forward(self, x):
# runs on *output* of layer
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = self.fc4(x)
return F.log_softmax(x, dim=1)
net = Net()
print(net)
X = torch.rand((28,28))
X = X.view(-1,28*28)
output = net(X)
print(output)
import torch.optim as optim
optimizer = optim.Adam(net.parameters(), lr = 0.001)
#iterate over our data (epoch is whole pass over data set)
EPOCHS = 3
for epoch in range(EPOCHS):
for data in trainset:
# data is a batch of featuresets and labels
X, y = data
# if you dont zero the gradient, they will just continue getting added together
net.zero_grad()
output = net(X.view(-1, 28*28))
# if our dataset is a set of vectors, we hope they are one-hot vectors (one value on) and we use m-square-err
loss = F.nll_loss(output, y)
loss.backward()
optimizer.step()
print(loss)
correct = 0
total = 0
# now we test! since we aren't training, we dont need to calculate gradients
with torch.no_grad():
for data in trainset:
X, y = data
output = net(X.view(-1,28*28))
for idx, i in enumerate(output):
if torch.argmax(i) == y[idx]:
correct += 1
total += 1
print("Accuracy: ", round(correct/total,3))
import matplotlib.pyplot as plt