-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathFastMap.py
318 lines (266 loc) · 12.8 KB
/
FastMap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
#!/usr/bin/env python
# encoding: utf-8
'''
Machine Learning Algorithm Name: Fast Map
This is a sample program to demonstrate the implementation of FastMap
@author: Cheng-Lin Li a.k.a. Clark
@copyright: 2017 Cheng-Lin Li@University of Southern California. All rights reserved.
@license: Licensed under the GNU v3.0. https://www.gnu.org/licenses/gpl.html
@contact: clark.cl.li@gmail.com
@version: 1.0
@create: October, 7, 2016
@updated: February, 15, 2017
A FastMap class is implemented and provide a lot of value sets for reference.
self.o_pair = An array, the original data set.
self.dist = An array, the original destination information between each object
self.k = An integer, the target dimension
self.obj_k_d = An array, the result: Object distances scale to k dimension.
self._col = An integer, current processing dimension
self._max_dist = A float, keep current max. distance of Oa, Ob in current dimension to speed up the performance.
self._new_dist_set = An array, the new destination information of current processing dimension between each object
self._max_Oa = An index information of Oa for max. distance for current dimension.
self._max_Ob = An index information of Ob for max. distance for current dimension.
self.obj_set = A set() data type, to store the label of total objects.
'''
import numpy as np
import matplotlib.pyplot as plt
def getInputData(filename):
# Get data from data file and split data into two parts.
#1. Object pair set: All object relationships store in this set.
#2. Distance set: All distances are store in this set according to the index of Object pair set.
_object_pair = np.array
_distance = np.array
_data = np.genfromtxt(filename, delimiter='\t')
_object_pair = np.array(_data[0:, 0:2])
_distance = np.array(_data[0:,2])
return _object_pair, _distance
def getObjectName(filename):
# Get data from data file and split data into two parts.
#1. Object pair set: All object relationships store in this set.
#2. Distance set: All distances are store in this set according to the index of Object pair set.
_label_set = []
with open(filename) as f:
_label_set = f.read().splitlines()
return _label_set
class FastMap():
'''
classdocs
A FastMap class is implemented and provide a lot of value sets for reference.
This implementation is base on the reference:
Faloutsos, Christos, and King-Ip Lin. FastMap: A fast algorithm for indexing, data-mining and visualization of traditional and multimedia datasets. Vol. 24. No. 2. ACM, 1995.
[Optimized]
Except first dimension the program select the pivot objects from existing distance information base on reference paper.
For rest of iterations, the program gets the maximum distance and pivot objects directly from self.max_dist, self.Oa, self.Ob that were computed during the "projection_on_hyper_plane()" step.
function lists:
1. set_obj_set(object_pair): Return self.obj_set
Get object id from data input
2. choose_distance_objects(object_pair, distance_set): Return _Oa, _Ob, _max_dist
Choose farthest distance and pair of points (pivot objects) between objects.
3. get_max_distance (object_pair, distance_set, Obj): Return _farthest_O, _max_distance
Base on input object, "Obj", get farthest distance object then return.
4. get_distance (object_pair, distance_set, xi, xj): Return distance(float)
Get distance from distance set base on matching objects (xi, xj) in object pair set.
5. calculate_projection_distance(object_pair, distance_set, Oa, Ob): Return self.obj_k_d
Calculate project distance on the axis of pivot objects.
6. projection_on_hyper_plane(object_pair, distance_set, xi_set, col): Return _Oa, _Ob, _max_dist
Project all objects into new hyper plane
7. execute(object_pair, distance, dimension): Return self.obj_k_d
Execute FastMap algorithm base on input object pair, distance set, and target dimension.
8. plot(label_set):
Plot diagram base on label_set which ordered by the same sequence of object base on ID.
'''
def __init__(self, object_pair=None, distance=None, dimension=0):
'''
Constructor
'''
self.o_pair = np.array(object_pair)
self.dist = np.array(distance)
self.k = int(dimension) #Target dimension
self.obj_k_d = np.array #Objects project to K dimension results.
self._col = 0 #Current dimension
self._max_dist = float(0) #Keep current max. distance to speed up the performance.
self._new_dist_set = np.array
self._max_Oa = 0
self._max_Ob = 0
self.obj_set = set()
def set_obj_set(self, object_pair):
'''
Get object id from data input
'''
for _each_pair in object_pair:
self.obj_set.add(_each_pair[0])
self.obj_set.add(_each_pair[1])
self.obj_k_d = np.zeros((len(self.obj_set), self.k))
return self.obj_set
def choose_distance_objects(self, object_pair, distance_set):
'''
Choose farthest distance and pair of points (pivot objects) between objects.
[Optimized]
Except first dimension the program select the pivot objects from existing distance information base on reference paper.
For rest of iterations, we get the maximum distance and pivot objects directly from self.max_dist, self.Oa, self.Ob that were computed
during the "projection_on_hyper_plane()" step.
'''
Oa = 0
original_Oa = 0
Ob = object_pair[0][0]
dist_set = distance_set
pre_max_dist = 0
max_dist = 0
b_get_max = False #The boolean flag to identify the program get the max. distance or not.
if (self._col == 0):
while b_get_max is False:
Oa, max_dist = self.get_max_distance(object_pair, dist_set, Ob)
if ( max_dist >= pre_max_dist and original_Oa != Oa and max_dist != 0):
pre_max_dist = max_dist
original_Oa = Ob
Ob = Oa
else:
Oa = original_Oa
max_dist = pre_max_dist
b_get_max = True
self._max_Oa = Oa
self._max_Ob = Ob
self._max_dist = max_dist
else:
# Directly get pivot objects and max. distance from the "projection_on_hyper_plane()" step.
Oa = self._max_Oa
Ob = self._max_Ob
max_dist = self._max_dist
return Oa, Ob, max_dist
def get_max_distance (self, object_pair, distance_set, Obj):
'''
Base on input object, "Obj", get farthest distance object then return
'''
farthest_O = 0
max_distance = 0
for idx, _each_pair in enumerate(object_pair):
#Match object in each object pair
if _each_pair[0] == Obj :
if distance_set[idx] >= max_distance :
max_distance = distance_set[idx]
farthest_O = _each_pair[1]
else:
pass
elif _each_pair[1] == Obj :
if distance_set[idx] >= max_distance :
max_distance = distance_set[idx]
farthest_O = _each_pair[0]
else:
pass
else:
pass
return farthest_O, max_distance
def get_distance(self, object_pair, distance_set, xi, xj):
'''
Get distance from distance set base on matching objects (xi, xj) in object pair set.
'''
for idx, _each_pair in enumerate(object_pair):
#Match object in each object pair
if (xi == xj) :
return 0
elif (_each_pair[0] == xi and _each_pair[1] == xj) or (_each_pair[1] == xi and _each_pair[0] == xj) :
return distance_set[idx]
else:
pass
def calculate_projection_distance(self, object_pair, distance_set, Oa, Ob):
'''
Compute the projection distance on pivot objects (Oa, Ob) for each object.
'''
_xi = 0
dis_set = distance_set
obj_pair = object_pair
Dai = 0
Dab = self._max_dist
Dbi = 0
for _idx, _i in enumerate(self.obj_set):
# Calculate new distance xi and store as a new dimension into self.obj_k_d
Dai = self.get_distance(obj_pair, dis_set, Oa, _i)
Dbi = self.get_distance(obj_pair, dis_set, Ob, _i)
_xi = (Dai**2 + Dab**2 - Dbi**2) / (2 * Dab)
self.obj_k_d[_idx][int(self._col)]=_xi
return self.obj_k_d
def projection_on_hyper_plane(self, object_pair, distance_set, xi_set, col):
'''
Compute the distance of each object on new hyper plane.
[Optimized]
The max. distance, Oa, Ob will be kept during the computation for next iteration.
'''
_xi = 0
obj_pair = object_pair
new_dist_set =np.zeros((len(obj_pair), 1)) #New distance set in projected hyper-plane.
_xi = xi_set
col = col
D_Oij = 0
D_xij = 0
max_dist = 0
max_Oa = 0
max_Ob = 0
for _idx, _pair in enumerate(obj_pair) :
D_Oij = self.get_distance(object_pair, distance_set, _pair[0], _pair[1])
D_xij = abs(_xi[int(_pair[0]-1)][int(col)] - _xi[int(_pair[1]-1)][int(col)])
new_dist_set[_idx] = np.sqrt((D_Oij**2)-(D_xij**2))
if (new_dist_set[_idx] >= max_dist):
max_dist = new_dist_set[_idx]
max_Oa = _pair[0]
max_Ob = _pair[1]
self._new_dist_set = new_dist_set
self._max_dist = max_dist
self._max_Oa = max_Oa
self._max_Ob = max_Ob
return self._new_dist_set
def execute(self, object_pair, distance, dimension):
'''
Execute function to enable the calculation.
'''
self.o_pair = object_pair
self.dist = distance
self.k = dimension
Oa = 0
Ob = 0
max_dist = 0
dist_set = np.array(distance)
new_dist_set = np.array
col = 0 # temp. variable to point to the column of x array with k dimension.
self.obj_set = self.set_obj_set(object_pair)
while (col < self.k) :
Oa, Ob, _max_dist = self.choose_distance_objects(self.o_pair, dist_set)
if (_max_dist == 0) :
break
else:
self.calculate_projection_distance(self.o_pair, dist_set, Oa, Ob)
col += 1
self._col = col
if(col < self.k):
_new_dist_set = self.projection_on_hyper_plane(self.o_pair, dist_set, self.obj_k_d, col-1)
#Base on previous xi, xj data to compute new distance
_dist_set = _new_dist_set
else:
break
return self.obj_k_d
def plot(self, label_set):
plt.xlabel("x-axis")
plt.ylabel("y-axis")
for i in range(len(self.obj_k_d)):
_x = self.obj_k_d[i][0]
_y = self.obj_k_d[i][1]
_label = label_set[i]
plt.plot(_x, _y, 'b.', markersize=10)
plt.annotate(
_label, xy = (_x, _y), xytext = (30, 20), textcoords = 'offset points', ha = 'right', va = 'bottom',
bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.1),
arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))
plt.show()
if __name__ == '__main__':
'''
Main program.
For the FastMap class execution..
'''
object_pair, distance = getInputData('fastmap-data.txt')
#print ('object_pair=', object_pair)
#print ('distance=' , distance)
dimension = 2
fm = FastMap()
k_d_distance = fm.execute(object_pair, distance, dimension)
print ('The ', dimension, ' dimensions result = \n', k_d_distance)
label_set = getObjectName('fastmap-wordlist.txt')
fm.plot(label_set)