-
Notifications
You must be signed in to change notification settings - Fork 17
/
STDBSCAN.py
69 lines (56 loc) · 2.96 KB
/
STDBSCAN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import math
from datetime import timedelta
from geopy.distance import great_circle
"""
INPUTS:
df={o1,o2,...,on} Set of objects
spatial_threshold = Maximum geographical coordinate (spatial) distance value
temporal_threshold = Maximum non-spatial distance value
min_neighbors = Minimun number of points within Eps1 and Eps2 distance
OUTPUT:
C = {c1,c2,...,ck} Set of clusters
"""
def ST_DBSCAN(df, spatial_threshold, temporal_threshold, min_neighbors):
cluster_label = 0
NOISE = -1
UNMARKED = 777777
stack = []
# initialize each point with unmarked
df['cluster'] = UNMARKED
# for each point in database
for index, point in df.iterrows():
if df.loc[index]['cluster'] == UNMARKED:
neighborhood = retrieve_neighbors(index, df, spatial_threshold, temporal_threshold)
if len(neighborhood) < min_neighbors:
df.at[index, 'cluster'] = NOISE
else: # found a core point
cluster_label = cluster_label + 1
df.at[index, 'cluster'] = cluster_label# assign a label to core point
for neig_index in neighborhood: # assign core's label to its neighborhood
df.at[neig_index, 'cluster'] = cluster_label
stack.append(neig_index) # append neighborhood to stack
while len(stack) > 0: # find new neighbors from core point neighborhood
current_point_index = stack.pop()
new_neighborhood = retrieve_neighbors(current_point_index, df, spatial_threshold, temporal_threshold)
if len(new_neighborhood) >= min_neighbors: # current_point is a new core
for neig_index in new_neighborhood:
neig_cluster = df.loc[neig_index]['cluster']
if (neig_cluster != NOISE) & (neig_cluster == UNMARKED):
# TODO: verify cluster average before add new point
df.at[neig_index, 'cluster'] = cluster_label
stack.append(neig_index)
return df
def retrieve_neighbors(index_center, df, spatial_threshold, temporal_threshold):
neigborhood = []
center_point = df.loc[index_center]
# filter by time
min_time = center_point['date_time'] - timedelta(minutes = temporal_threshold)
max_time = center_point['date_time'] + timedelta(minutes = temporal_threshold)
df = df[(df['date_time'] >= min_time) & (df['date_time'] <= max_time)]
# filter by distance
for index, point in df.iterrows():
if index != index_center:
distance = great_circle((center_point['latitude'], center_point['longitude']), (point['latitude'], point['longitude'])).meters
if distance <= spatial_threshold:
neigborhood.append(index)
return neigborhood