-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathexample.cpp
130 lines (105 loc) · 3.06 KB
/
example.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#include "tape.h" // Tape, Var, Grad
#include <iostream>
#include <random> // random_device, mt19937, normal_distribution
#include <vector>
using namespace std;
int main(int argc, char const *argv[])
{
// Tape saves partial derivatives from operations on Var objects
Tape t;
// seed pseudo-rng that initializes data and weights
// random_device rd;
// int seed = rd();
int seed = -1925795689;
mt19937 gen(seed);
int N = 100;
int dim = 2;
// XOR dataset of N samples and 2 dimensions per sample
float data[N][dim];
float target[N];
for (int i = 0; i < N; i++)
{
int sign = 0;
for (int j = 0; j < dim; j++)
{
// randomly generate data
data[i][j] = gen() / (float)gen.max() - 0.5f;
if (data[i][j] > 0)
{
sign++;
}
}
// label data with XOR
target[i] = sign < dim && sign > 0;
}
// weights for a neural network of 2 inputs, 3 hidden units, and 1 output
float weights1[2][3], weights2[3];
// pseudo-rng with 0 mean and 0.5 std-deviation
normal_distribution<float> distrib(0., 0.5f);
// initialize weights with normal distribution
for (int i = 0; i < 3; i++)
{
weights2[i] = distrib(gen);
for (int j = 0; j < 2; j++)
{
weights1[j][i] = distrib(gen);
}
}
// number of iterations over dataset
int iterations = 100;
// initial learning rate
float learn_rate = 0.01;
for (int it = 0; it < iterations; it++)
{
// Grad object that will store the loss gradient
Grad error;
float squared_loss = 0;
// stochastic gradient descent loop over dataset
for (int sample = 0; sample < N; sample++)
{
// arrays of Var objects
vector< vector<Var> > w1;
vector<Var> w2;
w1.resize(2);
// wrap the weights in Var objects, so the Tape can calculate derivatives
for (int i = 0; i < 3; i++)
{
w2.push_back(t.var(weights2[i]));
for (int j = 0; j < 2; j++)
{
w1[j].push_back(t.var(weights1[j][i]));
}
}
// hidden layer of 3 sigmoid units
vector<Var> hidden;
// compute the dot product of the 1x2 input and the 2x3 weights (w1)
for (int i = 0; i < 3; i++)
{
hidden.push_back(sigmoid(w1[0][i] * data[sample][0] + w1[1][i] * data[sample][1]));
}
// output sigmoid unit = dot product between the 1x3 hidden layer and the 3x1 weights (w2)
Var output = sigmoid(hidden[0] * w2[0] + hidden[1] * w2[1] + hidden[2] * w2[2]);
// squared loss
Var loss = (output - target[sample])^2;
squared_loss += loss.val;
// save the loss gradient
loss.grad(error);
// update weights
for (int i = 0; i < 3; i++)
{
weights2[i] -= error.wrt(w2[i]) * learn_rate;
for (int j = 0; j < 2; j++)
{
weights1[j][i] -= error.wrt(w1[j][i]) * learn_rate;
}
}
// clear Tape, discards previous graph to avoid allocating extra memory
t.clear();
}
cout << "iteration " << it << "/100 \tmean_squared_loss = " << squared_loss/100 << '\n';
// decay learning rate
learn_rate *= 0.99;
}
cout << "seed was " << seed << endl;
return 0;
}