-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathadaptive_k_mean.py
88 lines (73 loc) · 2.04 KB
/
adaptive_k_mean.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import sys
import numpy as np
from sklearn.cluster import KMeans, MiniBatchKMeans
from numpy import linalg as LA
import matplotlib.pyplot as plt
from cluster import Cluster, ClusterSet
is_load = True
is_voltage = False
fileName = sys.argv[1]
minK = int(sys.argv[2])
theta = float(sys.argv[3])
vectorLength = None
with open(fileName, 'r') as dataFile:
firstLine = dataFile.readline()
vectorLength = int(firstLine)
if is_load:
data = np.loadtxt(fileName,
skiprows = 1,
usecols = range(5, vectorLength + 5))
else:
data = np.loadtxt(fileName,
skiprows = 1,
usecols = range(1, vectorLength + 1))
ids = np.loadtxt(fileName,
skiprows = 1,
usecols = [0])
def plot_cluster(cluster):
for shape in cluster.points:
plt.plot(shape, color='black')
plt.plot(cluster.centroid, 'o', markerfacecolor='None',
markeredgewidth=2, markeredgecolor='red')
plt.xlabel('Hour')
plt.ylabel('Normal Usage')
plt.title('#' + str(cluster.label))
plt.show()
K = minK
# Initial centroid
centroids = np.zeros([K, vectorLength], dtype=np.float)
clusterSet = ClusterSet(data)
if is_load:
clusterSet.normalize()
if is_voltage:
clusterSet.voltage_normalize()
while True:
clusterSet.fitData(K)
n_v = clusterSet.findViolations(theta)
K += len(n_v)
for label in n_v:
clusterSet.splitLabel(label)
if len(n_v) == 0:
for cluster in clusterSet.clusterMap.values():
print len(cluster.points)
# plot the smallest cluster
l = clusterSet.smallestCluster()
print "Total clusters: ", K
print "Smallest cluster size: ", len(clusterSet.getCluster(l).points)
plot_cluster(clusterSet.getCluster(l))
f = open("adaptive_k_centers.txt", "w")
# total number of clusters
f.write(str(K) + '\n')
for label in clusterSet.clusterMap.keys():
# center for the cluster
cluster = clusterSet.getCluster(label)
centroid = cluster.centroid
for i in range(vectorLength):
if i < vectorLength - 1:
f.write(str(centroid[i]) + ' ')
else:
f.write(str(centroid[i]) + '\n')
# cluster size
f.write(str(len(cluster.points)) + '\n')
f.close()
break