-
Notifications
You must be signed in to change notification settings - Fork 13
/
ComparisonScript.m
338 lines (312 loc) · 15.6 KB
/
ComparisonScript.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
% This file compares many ways of GP regression for noisy input measurements. It compares their performance based on various scenarios.
% 1. Perfect GP regression, with exact input measurements (but noisy output measurements) and exact hyperparameters. (Only 200 measurements.)
% 2. Regular GP regression, with noisy input measurements and tuned hyperparameters. (Only 200 measurements.)
% 3. Regular FITC, using the hyperparameters of (2). (The full 800 measurements.)
% 4. The NIGP algorithm, which tunes the hyperparameters itself. (Only 200 measurements.)
% 5. The SONIG algorithm, using the hyperparameters of (4). (Only 200 measurements.)
% 6. The SONIG algorithm, using the hyperparameters of (4). (The full 800 measurements, which gives a roughly equal runtime as (4).)
% 7. The SONIG algorithm, getting an initial estimate using a subset (100) measurement points of the NIGP algorithm. (A total of 800 measurements are used.)
% Note that this order is different than the one in the paper. It just makes it a bit easier to execute in this order.
% We clear all data we have previously obtained.
clear all;
clc;
% We add the folders we need for both regression algorithms.
addpath('NIGP/');
addpath('NIGP/util/');
addpath('NIGP/tprod/');
addpath('SONIG/');
addpath('GPSupport/');
% We define the range of the plot we will make.
xMin = -5; % What is the minimum x value?
xMax = -xMin; % What is the maximum x value?
% We define numbers of points and set up the corresponding point spaces.
nm = 800; % This is the total number of available measurement points. They are used by algorithms 3, 6 and 7.
nmu = nm/4; % This is the number of measurements used by algorithms 1, 2, 4 and 5.
nmn = nm/8; % This is the number of measurements used for NIGP training in algorithm 7.
np = 101; % This is the number of plot points.
nu = 21; % The number of inducing input points.
xp = linspace(xMin,xMax,np); % These are the plot points.
xu = linspace(xMin,xMax,nu); % These are the inducing input points.
% We do various experiments. We start looping through them here. We also define some storage parameters.
numIterations = 10; % How many functions do we approximate? For the paper we used 200. Keep in mind that (at least on my system) one iteration lasts roughly 20 seconds.
numMethods = 7; % How many algorithms do we have? This is 7, unless you add algorithms yourself.
res = zeros(numMethods,3,numIterations);
muuS = zeros(nu,numMethods,numIterations);
mupS = zeros(np,numMethods,numIterations);
SuS = zeros(nu,nu,numMethods,numIterations);
SpS = zeros(np,np,numMethods,numIterations);
xmS = zeros(nm,numIterations);
xmrS = zeros(nm,numIterations);
ymS = zeros(nm,numIterations);
ymrS = zeros(nm,numIterations);
ypS = zeros(np,numIterations);
counter = 0; % We initialize a counter.
while counter < numIterations
% We set up some preliminary stuff for the loop.
counter = counter + 1;
flag = 0; % This is a flag which checks for problems.
disp(['Starting loop ',num2str(counter),'.']);
% We define some settings for the function which we will generate. We generate it by sampling from a GP.
sn = 0.1;
sx = 0.4;
len = 1;
alpha = 1;
Lambda = len^2;
realParams = [len,sx,alpha,sn];
% We set up the input points and the corresponding covariance matrices.
xmr = xMin + rand(1,nm)*(xMax - xMin); % These are the real measurement input points.
xm = xmr + sx*randn(1,nm); % These are the input points corrupted by noise.
xmrS(:,counter) = xmr'; % We store the measurement points, in case we want to inspect them later.
xmS(:,counter) = xm'; % We store the measurement points, in case we want to inspect them later.
% We calculate covariance matrices.
input = [xu,xm,xp,xmr];
diff = repmat(input,[size(input,2),1]) - repmat(input',[1,size(input,2)]);
K = alpha^2*exp(-1/2*diff.^2/Lambda);
Kuu = K(1:nu,1:nu);
Kmu = K(nu+1:nu+nm,1:nu);
Kpu = K(nu+nm+1:nu+nm+np,1:nu);
Kru = K(nu+nm+np+1:nu+nm+np+nm,1:nu);
Kum = K(1:nu,nu+1:nu+nm);
Kmm = K(nu+1:nu+nm,nu+1:nu+nm);
Kpm = K(nu+nm+1:nu+nm+np,nu+1:nu+nm);
Krm = K(nu+nm+np+1:nu+nm+np+nm,nu+1:nu+nm);
Kup = K(1:nu,nu+nm+1:nu+nm+np);
Kmp = K(nu+1:nu+nm,nu+nm+1:nu+nm+np);
Kpp = K(nu+nm+1:nu+nm+np,nu+nm+1:nu+nm+np);
Krp = K(nu+nm+np+1:nu+nm+np+nm,nu+nm+1:nu+nm+np);
Kur = K(1:nu,nu+nm+np+1:nu+nm+np+nm);
Kmr = K(nu+1:nu+nm,nu+nm+np+1:nu+nm+np+nm);
Kpr = K(nu+nm+1:nu+nm+np,nu+nm+np+1:nu+nm+np+nm);
Krr = K(nu+nm+np+1:nu+nm+np+nm,nu+nm+np+1:nu+nm+np+nm);
% To generate a random sample with covariance matrix K, we first have to find the Cholesky decomposition of K. That's what we do here.
epsilon = 0.0000001; % We add some very small noise to prevent K from being singular.
L = chol([Krr,Krp;Kpr,Kpp] + epsilon*eye(nm+np))'; % We take the Cholesky decomposition to be able to generate a sample with a distribution according to the right covariance matrix. (Yes, we could also use the mvnrnd function, but that one gives errors more often than the Cholesky function.)
sample = L*randn(nm+np,1);
% We create and store the measurements.
ymr = sample(1:nm)'; % These are the real function measurements, done at the real measurement input points, without any noise.
ym = ymr + sn*randn(1,nm); % We add noise to the function measurements, to get the noisy measurements.
yp = sample(nm+1:nm+np)'; % This is the function value of the function we want to approximate at the plot points.
ymrS(:,counter) = ymr';
ymS(:,counter) = ym';
ypS(:,counter) = yp';
% Method 1.
% We first set up a GP of the true measurements (with output noise but without input noise) with the actual hyperparameters.
mupGPn = Kpr(:,1:nmu)/(Krr(1:nmu,1:nmu) + sn^2*eye(nmu))*ym(1:nmu)';
SpGPn = Kpp - Kpr(:,1:nmu)/(Krr(1:nmu,1:nmu) + sn^2*eye(nmu))*Krp(1:nmu,:);
stdGPn = sqrt(diag(SpGPn));
% We examine and store the results.
mupS(:,1,counter) = mupGPn;
SpS(:,:,1,counter) = SpGPn;
res(1,:,counter) = [mean((mupGPn - yp').^2),mean(stdGPn.^2),mean(((mupGPn - yp')./stdGPn).^2)];
% Method 2.
% We now set up a GP of the noisy measurements, with tuned hyperparameters. First we tune the hyperparameters.
tic;
[sn,alpha,Lambda] = tuneHyperparameters(xm(1:nmu),ym(1:nmu)');
disp(['GP hyperparameter tuning time is ',num2str(toc),' s. Parameters found were lx: ',num2str(sqrt(Lambda)),', ly: ',num2str(alpha),', sy: ',num2str(sn),'.']);
param2 = [sn,alpha,Lambda];
% We recalculate covariance matrices.
input = [xu,xm,xp,xmr];
diff = repmat(input,[size(input,2),1]) - repmat(input',[1,size(input,2)]);
K = alpha^2*exp(-1/2*diff.^2/Lambda);
Kuu = K(1:nu,1:nu);
Kmu = K(nu+1:nu+nm,1:nu);
Kpu = K(nu+nm+1:nu+nm+np,1:nu);
Kru = K(nu+nm+np+1:nu+nm+np+nm,1:nu);
Kum = K(1:nu,nu+1:nu+nm);
Kmm = K(nu+1:nu+nm,nu+1:nu+nm);
Kpm = K(nu+nm+1:nu+nm+np,nu+1:nu+nm);
Krm = K(nu+nm+np+1:nu+nm+np+nm,nu+1:nu+nm);
Kup = K(1:nu,nu+nm+1:nu+nm+np);
Kmp = K(nu+1:nu+nm,nu+nm+1:nu+nm+np);
Kpp = K(nu+nm+1:nu+nm+np,nu+nm+1:nu+nm+np);
Krp = K(nu+nm+np+1:nu+nm+np+nm,nu+nm+1:nu+nm+np);
Kur = K(1:nu,nu+nm+np+1:nu+nm+np+nm);
Kmr = K(nu+1:nu+nm,nu+nm+np+1:nu+nm+np+nm);
Kpr = K(nu+nm+1:nu+nm+np,nu+nm+np+1:nu+nm+np+nm);
Krr = K(nu+nm+np+1:nu+nm+np+nm,nu+nm+np+1:nu+nm+np+nm);
% We make a GP prediction using the noisy measurement points.
mupGPm = Kpm(:,1:nmu)/(Kmm(1:nmu,1:nmu) + sn^2*eye(nmu))*ym(1:nmu)';
SpGPm = Kpp - Kpm(:,1:nmu)/(Kmm(1:nmu,1:nmu) + sn^2*eye(nmu))*Kmp(1:nmu,:);
stdGPm = sqrt(diag(SpGPm));
% We examine and store the results.
mupS(:,2,counter) = mupGPm;
SpS(:,:,2,counter) = SpGPm;
res(2,:,counter) = [mean((mupGPm - yp').^2),mean(stdGPm.^2),mean(((mupGPm - yp')./stdGPm).^2)];
% Method 3.
% Next, we set up the FITC algorithm for the noisy measurements, using the hyperparameters which we just found. We use all nm measurements here.
Lmm = diag(diag(Kmm + sn^2*eye(nm) - Kmu/Kuu*Kum));
SuFITC = Kuu/(Kuu + Kum/Lmm*Kmu)*Kuu;
muuFITC = SuFITC/Kuu*Kum/Lmm*ym';
mupFITC = Kpu/Kuu*muuFITC;
SpFITC = Kpp - Kpu/Kuu*(Kuu - SuFITC)/Kuu*Kup;
stdUFITC = sqrt(diag(SpFITC));
% We examine and store the results.
muuS(:,3,counter) = muuFITC;
SuS(:,:,3,counter) = SuFITC;
mupS(:,3,counter) = mupFITC;
SpS(:,:,3,counter) = SpFITC;
res(3,:,counter) = [mean((mupFITC - yp').^2),mean(stdUFITC.^2),mean(((mupFITC - yp')./stdUFITC).^2)];
% Method 4.
% The next step is to train the NIGP algorithm. We start doing that now. To search more efficiently, we initialize the hyperparameters as the true parameters, but the algorithm won't converge
% on this anyway. It'll find its own parameters. So no cheating here. Well, not much anyway.
seard = log(realParams([1,3,4])');
lsipn = log(realParams(2));
tic;
evalc('[model, nigp] = trainNIGP(permute(xm(:,1:nmu),[2,1]),permute(ym(:,1:nmu),[2,1]),-500,1,seard,lsipn);'); % We apply the NIGP training algorithm. We put this in an evalc function to suppress the output made by the NIGP algorithm.
% We extract the derived settings.
len = exp(model.seard(1,1));
alpha = exp(model.seard(2,1));
sn = exp(model.seard(3,1));
sx = exp(model.lsipn);
Lambda = len^2;
Sx = sx^2;
disp(['NIGP hyperparameter tuning time is ',num2str(toc),' s. Parameters found were lx: ',num2str(len),', sx: ',num2str(sx),', ly: ',num2str(alpha),', sy: ',num2str(sn),'.']);
% We recalculate covariance matrices.
input = [xu,xm,xp,xmr];
diff = repmat(input,[size(input,2),1]) - repmat(input',[1,size(input,2)]);
K = alpha^2*exp(-1/2*diff.^2/Lambda);
Kuu = K(1:nu,1:nu);
Kmu = K(nu+1:nu+nm,1:nu);
Kpu = K(nu+nm+1:nu+nm+np,1:nu);
Kru = K(nu+nm+np+1:nu+nm+np+nm,1:nu);
Kum = K(1:nu,nu+1:nu+nm);
Kmm = K(nu+1:nu+nm,nu+1:nu+nm);
Kpm = K(nu+nm+1:nu+nm+np,nu+1:nu+nm);
Krm = K(nu+nm+np+1:nu+nm+np+nm,nu+1:nu+nm);
Kup = K(1:nu,nu+nm+1:nu+nm+np);
Kmp = K(nu+1:nu+nm,nu+nm+1:nu+nm+np);
Kpp = K(nu+nm+1:nu+nm+np,nu+nm+1:nu+nm+np);
Krp = K(nu+nm+np+1:nu+nm+np+nm,nu+nm+1:nu+nm+np);
Kur = K(1:nu,nu+nm+np+1:nu+nm+np+nm);
Kmr = K(nu+1:nu+nm,nu+nm+np+1:nu+nm+np+nm);
Kpr = K(nu+nm+1:nu+nm+np,nu+nm+np+1:nu+nm+np+nm);
Krr = K(nu+nm+np+1:nu+nm+np+nm,nu+nm+np+1:nu+nm+np+nm);
% We make the NIGP prediction for the test points.
mupNIGP = Kpm(:,1:nmu)/(Kmm(1:nmu,1:nmu) + sn^2*eye(nmu) + diag(model.dipK(1:nmu)))*ym(1:nmu)';
SpNIGP = Kpp - Kpm(:,1:nmu)/(Kmm(1:nmu,1:nmu) + sn^2*eye(nmu) + diag(model.dipK(1:nmu)))*Kmp(1:nmu,:);
stdpNIGP = sqrt(diag(SpNIGP));
% We examine and store the results.
mupS(:,4,counter) = mupNIGP;
SpS(:,:,4,counter) = SpNIGP;
res(4,:,counter) = [mean((mupNIGP - yp').^2),mean(stdpNIGP.^2),mean(((mupNIGP - yp')./stdpNIGP).^2)];
% Method 5/6/7.
% And now it's time for the SONIG algorithm, done in various ways.
for method = 5:7
% We first look at which measurements we use, as well as set up a SONIG object and give it the right inducing input points.
if method == 5 || method == 6
% We set which measurement points we will use.
fromPoint = 1;
if method == 5
toPoint = nmu;
else
toPoint = nm;
end
% We set up a SONIG object with the right hyperparameters.
hyp = NIGPModelToHyperparameters(model);
sonig = createSONIG(hyp);
sonig = addInducingInputPoint(sonig, xu);
else
% We set which measurement points we will use.
fromPoint = nmn+1;
toPoint = nm;
% We apply NIGP training on the first set of measurements.
seard = log(realParams([1,3,4])');
lsipn = log(realParams(2));
tic;
evalc('[model, nigp] = trainNIGP(permute(xm(:,1:nmn),[2,1]),permute(ym(:,1:nmn),[2,1]),-500,1,seard,lsipn);'); % We apply the NIGP training algorithm. We put this in an evalc function to suppress the output made by the NIGP algorithm.
% We extract the derived settings.
len = exp(model.seard(1,1));
alpha = exp(model.seard(2,1));
sn = exp(model.seard(3,1));
sx = exp(model.lsipn);
Lambda = len^2;
Sx = sx^2;
% We recalculate covariance matrices.
input = [xu,xm,xp,xmr];
diff = repmat(input,[size(input,2),1]) - repmat(input',[1,size(input,2)]);
K = alpha^2*exp(-1/2*diff.^2/Lambda);
Kuu = K(1:nu,1:nu);
Kmu = K(nu+1:nu+nm,1:nu);
Kpu = K(nu+nm+1:nu+nm+np,1:nu);
Kru = K(nu+nm+np+1:nu+nm+np+nm,1:nu);
Kum = K(1:nu,nu+1:nu+nm);
Kmm = K(nu+1:nu+nm,nu+1:nu+nm);
Kpm = K(nu+nm+1:nu+nm+np,nu+1:nu+nm);
Krm = K(nu+nm+np+1:nu+nm+np+nm,nu+1:nu+nm);
Kup = K(1:nu,nu+nm+1:nu+nm+np);
Kmp = K(nu+1:nu+nm,nu+nm+1:nu+nm+np);
Kpp = K(nu+nm+1:nu+nm+np,nu+nm+1:nu+nm+np);
Krp = K(nu+nm+np+1:nu+nm+np+nm,nu+nm+1:nu+nm+np);
Kur = K(1:nu,nu+nm+np+1:nu+nm+np+nm);
Kmr = K(nu+1:nu+nm,nu+nm+np+1:nu+nm+np+nm);
Kpr = K(nu+nm+1:nu+nm+np,nu+nm+np+1:nu+nm+np+nm);
Krr = K(nu+nm+np+1:nu+nm+np+nm,nu+nm+np+1:nu+nm+np+nm);
% We set up a SONIG object and give it the starting distribution given by the NIGP algorithm.
hyp = NIGPModelToHyperparameters(model); % We use the hyperparameters just provided by the NIGP algorithm.
sonig = createSONIG(hyp);
sonig = addInducingInputPoint(sonig, xu);
muu = Kum(:,1:nmn)/(Kmm(1:nmn,1:nmn) + sn^2*eye(nmn) + diag(model.dipK(1:nmn)))*ym(1:nmn)'; % This is the mean of the inducing input points, predicted by NIGP after nmn measurements.
Su = Kuu - Kum(:,1:nmn)/(Kmm(1:nmn,1:nmn) + sn^2*eye(nmn) + diag(model.dipK(1:nmn)))*Kmu(1:nmn,:); % And this is the covariance matrix.
sonig.fu{1} = createDistribution(muu, Su);
end
% And now we implement all the measurements into the SONIG object.
for i = fromPoint:toPoint
inputDist = createDistribution(xm(:,i), hyp.sx^2); % This is the prior distribution of the input point.
outputDist = createDistribution(ym(:,i), hyp.sy^2); % This is the prior distribution of the output point.
[sonig, inputPost, outputPost] = implementMeasurement(sonig, inputDist, outputDist); % We implement the measurement into the SONIG object.
end
[mupSONIG, SpSONIG, stdpSONIG] = makeSonigPrediction(sonig, xp); % Here we make the prediction.
% We check if the resulting SONIG object is valid. If not, some problem has occurred.
if sonig.valid == 0
disp(['Problems occurred. Restarting loop ',num2str(counter),'.']);
counter = counter - 1;
continue;
end
% We examine and store the results.
muuS(:,method,counter) = sonig.fu{1}.mean;
SuS(:,:,method,counter) = sonig.fu{1}.cov;
mupS(:,method,counter) = mupSONIG;
SpS(:,:,method,counter) = SpSONIG;
res(method,:,counter) = [mean((mupSONIG - yp').^2),mean(stdpSONIG.^2),mean(((mupSONIG - yp')./stdpSONIG).^2)];
end
end
% Finally, we evaluate the results. For this, we get rid of the worst parts of the results of each algorithm.
disp('We are done! Results are as follows for the various methods. (Note that the order is different from the order in the paper.)');
disp(' MSE Mean var. Ratio (The MSE and Mean var have been multiplied by 1000 for visibility.)');
result = mean(res(:,1:2,:),3);
disp([result*1e3,result(:,1)./result(:,2)]); % We show the results. We multiply the errors by a thousand to make the numbers more visible in Matlab.
% save('ComparisonScriptExperiments');
%% With this script, we can plot the result of a certain sample from the script above. We can also load in earlier data.
% load('ComparisonScriptExperiments');
% Which sample (or counter number) should we plot?
sample = 1;
% We extract the measurements and plot points for this case.
xm = xmS(:,sample);
xmr = xmrS(:,sample);
ym = ymS(:,sample);
ymr = ymrS(:,sample);
yp = ypS(:,sample);
% We plot the resulting function generated for that case, including the measurements that were done.
figure(11);
clf(11);
hold on;
grid on;
plot(xp, yp, 'b-');
plot(xmr(1:nmu), ymr(1:nmu), 'g+');
plot(xm(1:nmu), ym(1:nmu), 'ro');
title('Sample GP function');
xlabel('Input');
ylabel('Output');
% For each of the algorithms, we plot the results.
pointsUsed = [nmu,nmu,nm,nmu,nmu,nm,nm];
for i = 1:numMethods
makeGPPlot(i, xp, mupS(:,i,sample), sqrt(diag(SpS(:,:,i,sample))));
plot(xm(1:pointsUsed(i)), ym(1:pointsUsed(i)), 'ro');
plot(xp,yp,'b-');
title(['Case ',num2str(i)]);
xlabel('Input');
ylabel('Output');
if muuS(1,i,sample) ~= 0
plot(xu,muuS(:,i,sample),'ko');
end
end