-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpca.py
54 lines (36 loc) · 1.46 KB
/
pca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import sys
from utils import reader
import numpy as np
from scipy.linalg import eigh
class Model():
def __init__(self, k):
# Initialization here
self.k = k
self.pca = False
pass
def pcaFun(self, x):
x = np.float_(x.T)
NoOfExamples = x.shape[1]
NoOfFeatures = x.shape[0]
S = np.zeros([NoOfFeatures, NoOfFeatures])
for example in range(NoOfExamples):
x[:, example] = x[:, example] - np.average(x[:, example]) # normalize data
sigma = np.sqrt(np.average(np.power(x[:, example], 2))) # normalize data
x[:, example] = x[:, example] / sigma # normalize data
S += np.dot(x[:, example], x[:, example].T) # build S matrix
S = S / NoOfFeatures # finish building S matrix
evalues, evectors = eigh(S, eigvals=(NoOfFeatures-self.k,NoOfFeatures-1)) # find the k top eigenvectors
xReduced = np.dot(evectors.T, x) # calculate the input features in the low dimensional space
self.pca = True
return xReduced # Return features in low dimensionsal space. Number of rows is k and number of columns is the number of examples
def main(argv):
if len(argv) < 3:
print "Usage: python pca.py <train_data> <test_data>"
sys.exit(1)
y, x = reader.read(argv[1], extractFeaturesFn=extractFeatures, extractLabelsFn=extractLabel, limit=LIMIT)
# testY, testX = reader.read(argv[2], extractFeaturesFn=extractFeatures, extractLabelsFn=extractLabel, limit=LIMIT)
k = 3
model = Model(k)
compressedFeatures = model.pca(x)
if __name__ == "__main__":
main(sys.argv)