Skip to content

Commit 63e4fb6

Browse files
author
Lucas van Walstijn
committed
inital commit
0 parents  commit 63e4fb6

11 files changed

+779
-0
lines changed

1.py

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
4+
trainCSV = np.genfromtxt('quasar_train.csv',delimiter=",")
5+
lambdas = trainCSV[0,:]
6+
train = trainCSV[1:,:]
7+
test = np.genfromtxt('quasar_test.csv',delimiter=",",skip_header=1)
8+
9+
mm = lambdas.size
10+
11+
y = train[0,:]
12+
y.reshape(y.size,1)
13+
14+
x1 = np.ones((mm,1))
15+
x2 = lambdas.reshape((mm,1))
16+
17+
X = np.hstack((x1,x2))
18+
19+
XtX = (X.T).dot(X)
20+
XtX_inv = np.linalg.inv( XtX )
21+
22+
theta = (XtX_inv).dot(X.T).dot(y)
23+
24+
plt.figure(1, figsize=(8, 4))
25+
plt.scatter( x2, y, marker="x", c="red", s=2 )
26+
plt.ylabel('Intensity')
27+
plt.xlabel("Wavelength")
28+
plt.title("Some noisy spectrum with an OLS regression")
29+
plt.plot( x2, X.dot(theta), "b-", )
30+
plt.show()

2.py

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
from weight import weight
4+
5+
trainCSV = np.genfromtxt('quasar_train.csv',delimiter=",")
6+
lambdas = trainCSV[0,:]
7+
train = trainCSV[1:,:]
8+
test = np.genfromtxt('quasar_test.csv',delimiter=",",skip_header=1)
9+
10+
mm = lambdas.size
11+
12+
y = train[0,:]
13+
y.reshape(y.size,1)
14+
15+
x1 = np.ones((mm,1))
16+
x2 = lambdas.reshape((mm,1))
17+
18+
X = np.hstack((x1,x2))
19+
20+
tau = 5
21+
22+
y_hat = np.zeros(mm)
23+
for i in range(0, mm):
24+
W = weight(x2, x2[i], tau)
25+
XtWX = (X.T).dot(W).dot(X)
26+
XtWX_inv = np.linalg.inv(XtWX)
27+
theta_w = (XtWX_inv).dot(X.T).dot(W).dot(y)
28+
y_hat[i] = X.dot(theta_w)[i]
29+
30+
plt.figure(1, figsize=(8, 4))
31+
plt.ylabel('Intensity')
32+
plt.xlabel("Wavelength")
33+
plt.title("Smoothing some spectral data with WLS regression")
34+
plt.scatter(x2, y, marker="x", c="red", s=2 )
35+
plt.plot(x2, y_hat, "b-")
36+
plt.legend()
37+
plt.show()

3.py

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
from weight import weight
4+
5+
trainCSV = np.genfromtxt('quasar_train.csv',delimiter=",")
6+
lambdas = trainCSV[0,:]
7+
train = trainCSV[1:,:]
8+
test = np.genfromtxt('quasar_test.csv',delimiter=",",skip_header=1)
9+
10+
mm = lambdas.size
11+
12+
x1 = np.ones((mm,1))
13+
x2 = lambdas.reshape((mm,1))
14+
X = np.hstack((x1,x2))
15+
16+
tau = 5
17+
18+
test_smooth = np.zeros(test.shape)
19+
for j in range(0, test.shape[0]):
20+
y_hat = np.zeros(mm)
21+
y = test[j, :]
22+
y.reshape(y.size, 1)
23+
for i in range(0, mm):
24+
W = weight(x2, x2[i], tau)
25+
XtWX = (X.T).dot(W).dot(X)
26+
XtWX_inv = np.linalg.inv(XtWX)
27+
theta_w = (XtWX_inv).dot(X.T).dot(W).dot(y)
28+
y_hat[i] = X.dot(theta_w)[i]
29+
print("{0}% complete".format(j/test.shape[0]*100))
30+
test_smooth[j] = y_hat
31+
32+
np.savetxt("quasar_test_smooth.csv", test_smooth, delimiter=",")

4.py

+80
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
from weight import weight
4+
from distance import getDistanceMatrix
5+
from distance import getDistanceMatrixTrainTest
6+
from distance import distance
7+
8+
def ker(t):
9+
return np.max([1-t, 0])
10+
11+
trainCSV = np.genfromtxt('quasar_train.csv',delimiter=",")
12+
lambdas = trainCSV[0,:]
13+
lambdas = lambdas.reshape((lambdas.shape[0],1))
14+
15+
train = np.genfromtxt('quasar_train_smooth.csv',delimiter=",")
16+
test = np.genfromtxt('quasar_test_smooth.csv',delimiter=",")
17+
18+
right_index = np.where(lambdas == 1300)[0][0]
19+
left_index = np.where(lambdas == 1200)[0][0]
20+
21+
lambdas_right = lambdas[right_index:,:]
22+
lambdas_left = lambdas[:left_index,:]
23+
24+
train_right = train[:,right_index:]
25+
test_right = test[:,right_index:]
26+
train_left = train[:,:left_index]
27+
test_left = test[:,:left_index]
28+
29+
mm = train.shape[0]
30+
31+
32+
33+
neighborhood_size = 3
34+
35+
train_right_distance = getDistanceMatrix(train_right)
36+
train_left_hat = np.zeros(train_left.shape)
37+
error = np.zeros((mm,1))
38+
for i in range(0,mm):
39+
indices_of_neighbors = train_right_distance[i].argsort()[:neighborhood_size]
40+
distance_with_neighbors = train_right_distance[i][indices_of_neighbors]
41+
h = np.nanmax(train_right_distance[i])
42+
upper = 0
43+
lower = 0
44+
for j in range(0, neighborhood_size):
45+
kernel = ker(distance_with_neighbors[j]/h)
46+
upper = upper + kernel * train_left[indices_of_neighbors[j],:]
47+
lower = lower + kernel
48+
train_left_hat[i] = upper / lower
49+
error[i] = distance(train_left[i], train_left_hat[i])
50+
51+
print(np.average(error))
52+
53+
mm_test = test.shape[0]
54+
distanceM = getDistanceMatrixTrainTest(train_right, test_right)
55+
test_left_hat = np.zeros(test_left.shape)
56+
error = np.zeros((mm_test,1))
57+
for i in range(0,mm_test):
58+
indices_of_neighbors = distanceM[i].argsort()[:neighborhood_size]
59+
distance_with_neighbors = distanceM[i][indices_of_neighbors]
60+
h = np.nanmax(distanceM[i])
61+
upper = 0
62+
lower = 0
63+
for j in range(0, neighborhood_size):
64+
#kernel = ker(distance_with_neighbors[j]/h)
65+
kernel = ker(distance_with_neighbors[j] / h)
66+
upper = upper + kernel * train_left[indices_of_neighbors[j],:]
67+
lower = lower + kernel
68+
test_left_hat[i] = upper / lower
69+
error[i] = distance(test_left[i], test_left_hat[i])
70+
71+
print(np.average(error))
72+
73+
74+
plt.figure(1, figsize=(4, 4))
75+
plt.plot(lambdas, test[5], ".", label="true" )
76+
plt.plot(lambdas_left, test_left_hat[5], ".", label="true" )
77+
plt.legend()
78+
plt.show()
79+
80+

5.py

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
from weight import weight
4+
from distance import getDistanceMatrix
5+
from distance import getDistanceMatrixTrainTest
6+
from distance import distance
7+
8+
def ker(t):
9+
return np.max([1-t, 0])
10+
11+
trainCSV = np.genfromtxt('quasar_train.csv',delimiter=",")
12+
lambdas = trainCSV[0,:]
13+
lambdas = lambdas.reshape((lambdas.shape[0],1))
14+
15+
train = np.genfromtxt('quasar_train_smooth.csv',delimiter=",")
16+
test = np.genfromtxt('quasar_test_smooth.csv',delimiter=",")
17+
18+
right_index = np.where(lambdas == 1300)[0][0]
19+
left_index = np.where(lambdas == 1200)[0][0]
20+
21+
lambdas_right = lambdas[right_index:,:]
22+
lambdas_left = lambdas[:left_index,:]
23+
24+
train_right = train[:,right_index:]
25+
test_right = test[:,right_index:]
26+
train_left = train[:,:left_index]
27+
test_left = test[:,:left_index]
28+
29+
mm = train.shape[0]
30+
31+
neighborhood_size = 3
32+
33+
mm_test = test.shape[0]
34+
distanceM = getDistanceMatrixTrainTest(train_right, test_right)
35+
test_left_hat = np.zeros(test_left.shape)
36+
error = np.zeros((mm_test,1))
37+
for i in range(0,mm_test):
38+
indices_of_neighbors = distanceM[i].argsort()[:neighborhood_size]
39+
distance_with_neighbors = distanceM[i][indices_of_neighbors]
40+
h = np.nanmax(distanceM[i])
41+
upper = 0
42+
lower = 0
43+
for j in range(0, neighborhood_size):
44+
kernel = ker(distance_with_neighbors[j] / h)
45+
upper = upper + kernel * train_left[indices_of_neighbors[j],:]
46+
lower = lower + kernel
47+
test_left_hat[i] = upper / lower
48+
error[i] = distance(test_left[i], test_left_hat[i])
49+
50+
print(np.average(error))
51+
52+
53+
plt.figure(1, figsize=(4, 4))
54+
plt.plot(lambdas, test[0], ".", label="true" )
55+
plt.plot(lambdas_left, test_left_hat[0], ".", label="true" )
56+
plt.legend()
57+
plt.show()
58+
59+

distance.py

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import numpy as np
2+
3+
def distance(f1,f2):
4+
diff = f1 - f2
5+
return diff.T.dot(diff)
6+
7+
def getDistanceMatrix(X):
8+
mm = X.shape[0]
9+
nn = X.shape[1]
10+
distanceMatrix = np.nan * np.eye(mm)
11+
for i in range(0,mm):
12+
for j in range(0,mm):
13+
if (i==j):
14+
continue
15+
f1 = X[i,:]
16+
f2 = X[j,:]
17+
distanceMatrix[i,j] = distance(f1,f2)
18+
return distanceMatrix
19+
20+
def getDistanceMatrixTrainTest(train, test):
21+
mtrain = train.shape[0]
22+
mtest = test.shape[0]
23+
distanceMatrix = np.zeros((mtest,mtrain))
24+
for i in range(0,mtest):
25+
for j in range(0,mtrain):
26+
f1 = train[j,:]
27+
f2 = test[i,:]
28+
distanceMatrix[i,j] = distance(f1,f2)
29+
return distanceMatrix

0 commit comments

Comments
 (0)