-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathunderst_pytorch_ex3.py
135 lines (97 loc) · 3.55 KB
/
underst_pytorch_ex3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
"""
https://towardsdatascience.com/understanding-pytorch-with-an-example-a-step-by-step-tutorial-81fc5f8c4e8e
"""
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.optim as optim
import torch.nn as nn
import time
import os
from matplotlib import _pylab_helpers
# $ pip install torchviz
from torchviz import make_dot
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# Data Generation
np.random.seed(42)
x = np.random.rand(100, 1)
y = 1 + 2 * x + .1 * np.random.randn(100, 1)
# Shuffles the indices
idx = np.arange(100)
np.random.shuffle(idx)
# Uses first 80 random indices for train
train_idx = idx[:80]
# Uses the remaining indices for validation
val_idx = idx[80:]
# Generates train and validation sets
x_train, y_train = x[train_idx], y[train_idx]
x_val, y_val = x[val_idx], y[val_idx]
plt.plot(x_val, y_val, 'r.')
plt.plot(x_train, y_train, 'b.')
if False:
# plt.show()
# _pylab_helpers.Gcf.get_active().canvas.start_event_loop(0)
plt.pause(0)
# Our data was in Numpy arrays, but we need to transform them into PyTorch's Tensors
# and then we send them to the chosen device
x_train_tensor = torch.from_numpy(x_train).float().to(device)
y_train_tensor = torch.from_numpy(y_train).float().to(device)
# Here we can see the difference - notice that .type() is more useful
# since it also tells us WHERE the tensor is (device)
print(type(x_train), type(x_train_tensor), x_train_tensor.type())
# We can specify the device at the moment of creation - RECOMMENDED!
torch.manual_seed(42)
a = torch.randn(1, requires_grad=True, dtype=torch.float, device=device)
b = torch.randn(1, requires_grad=True, dtype=torch.float, device=device)
print("BEFORE: a,b")
print(a, b)
# Sets learning rate
lr = 1e-1
# Defines number of epochs
n_epochs = 1000
# Defines a MSE loss function
loss_fn = nn.MSELoss(reduction='mean')
# Defines a SGD optimizer to update the parameters
optimizer = optim.SGD([a, b], lr=lr)
for epoch in range(n_epochs):
# Computes our model's predicted output
yhat = a + b * x_train_tensor
if False:
# How wrong is our model? That's the error!
error = (y_train_tensor - yhat)
# It is a regression, so it computes mean squared error (MSE)
loss = (error ** 2).mean()
if False:
print("now dot...")
os.getcwd()
make_dot(yhat).render("torch_graph_yhat", format="png")
make_dot(error).render("torch_graph_error", format="png")
make_dot(loss).render("torch_graph_loss", format="png")
break
loss = loss_fn(y_train_tensor, yhat)
# No more manual computation of gradients!
# # Computes gradients for both "a" and "b" parameters
# a_grad = -2 * error.mean()
# b_grad = -2 * (x_tensor * error).mean()
# We just tell PyTorch to work its way BACKWARDS from the specified loss!
loss.backward()
if False:
# Let's check the computed gradients...
print(a.grad)
print(b.grad)
# No more telling PyTorch to let gradients go!
if False:
# THIRD ATTEMPT
# We need to use NO_GRAD to keep the update out of the gradient computation
# Why is that? It boils down to the DYNAMIC GRAPH that PyTorch uses...
with torch.no_grad():
a -= lr * a.grad
b -= lr * b.grad
# PyTorch is "clingy" to its computed gradients, we need to tell it to let it go...
a.grad.zero_()
b.grad.zero_()
optimizer.step()
optimizer.zero_grad()
print("AFTER: a,b")
print(a, b)
print("done")