-
Notifications
You must be signed in to change notification settings - Fork 1
/
dbscan.py
124 lines (104 loc) · 3.31 KB
/
dbscan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# Algorithm
#
# DBSCAN(data, k)
# Arbitrary select a point p with label Unknown
# If p is a border point (non-core point), label it as NOISE:
# no points are density-reachable from p and DBSCAN visits the next point of the database.
# If p is a core point, a cluster is formed by
# Retrieving all points density-reachable (dr) from p w.r.t. Eps and MinPts:
# retrieve all ddr points from p, retrieve all ddr points of the ddr points,
# Labeling all retrieved points with the cluster label.
# Merge the current cluster to existing cluster if find any dr point that is already a member of a cluster
# Continue the process until all of the points have been processed.
#
import sys
sys.path.append(".\isclust")
import math
import msvcrt
import isclust
import random
import time
import sets
# Cluster data
# data: a list of objects. Each object is a list. The first attribute is the cluster label
#
def dbscan(data, Eps, MinPts, gnuplot):
clusters = []
clusterId = 1
isclust.plotDBSCAN(gnuplot, data, clusters, "X", "Y", 0, 10, 0, 10, "DBSCAN")
if( isclust.checkKey() ):
return []
while(1):
for p in data:
cluster,mergeList = findAllDrs(clusterId, p, data, Eps, MinPts)
if( len(cluster) > 0 ):
# Merge cluster
mergedCluster = cluster
mergeList = sets.Set(mergeList)
for cid in mergeList:
mergedCluster += clusters[cid-1]
for d in clusters[cid-1]:
d[0] = clusterId
clusters[cid-1] = []
# append the current cluster to the cluster set
clusters.append(mergedCluster)
clusterId += 1
isclust.plotDBSCAN(gnuplot, data, clusters, "X", "Y", 0, 10, 0, 10, "DBSCAN")
if( isclust.checkKey() ):
return []
if( isclust.checkKey() ):
return []
return clusters
# Find all density reachable points from a core point p
def findAllDrs(clusterId, p, data, Eps, MinPts):
if( p[0] > 0 ): # already assigned
return [],[]
ddrs,mergeList = findAllDdrs(clusterId, p, data, Eps, MinPts)
#print "ddrs", ddrs
ddrs2 = []
for q in ddrs:
d,m = findAllDrs(clusterId, q, data, Eps, MinPts)
ddrs += d
mergeList += m
return ddrs, mergeList
# find all directly density reachable points from a core point p
def findAllDdrs(clusterId, p, data, Eps, MinPts):
ddrs = []
mergeList = []
NptsNo = 0
# check if p is a core point
for i in data:
if(i is p):
continue
d = isclust.disimilarity(p,i)
if( d < Eps ):
NptsNo += 1
ddrs.append(i)
if( NptsNo >= MinPts ): # core point
cluster = []
p[0] = clusterId
for j in ddrs:
if( j[0] <= 0):
j[0] = clusterId
cluster.append(j)
else:
mergeList.append(j[0])
return cluster,mergeList
else:
return [],[]
if __name__ == '__main__':
gnuplot = isclust.getGnuPlot()
time.sleep(1)
sigma = 0.5
NumPoints = 20
db1 = isclust.genDataXY(NumPoints, sigma, 2, 2) # return a list of lists. Each list [label, x0, x1,....]
db2 = isclust.genDataXY(NumPoints, sigma, 6, 6)
db3 = isclust.genDataXY(NumPoints, sigma, 2, 6)
db = db1 + db2 + db3
#data = [[0,1,1], [0,1, 2], [0,1,1.5], [0, 5, 4], [0, 4.5, 4], [0, 4, 4.8]]
Eps = 1.5
MinPts = 3
centroids = dbscan(db, Eps, MinPts, gnuplot)
#Eps = 3
#MinPts = 5
# centroids = dbscan(db, Eps, MinPts, gnuplot)