@@ -24,21 +24,19 @@ vector<int> k_random_selection(int len, int k) {
24
24
return pick_random_points;
25
25
}
26
26
27
- void k_means_pp (vector<int > ¢roids_ind, vector< const Curve*> ¢roids, int len, int k, const char *dist_func) {
27
+ void k_means_pp (vector<const Curve*> ¢roids, int len, int k, const char *dist_func) {
28
28
vector<double > min_distance (len, -1 );
29
29
vector<bool > is_centroid (len, false );
30
30
int pos;
31
31
double max_sum = 0 ;
32
32
33
- centroids_ind.reserve (k);
34
33
centroids.reserve (k);
35
34
36
35
for (int t = 0 ; ; ++t) {
37
36
if (!t) {
38
37
pos = rand () % len;
39
38
40
39
is_centroid[pos] = true ;
41
- centroids_ind.push_back (pos);
42
40
centroids.push_back (&input_curves[pos]);
43
41
}
44
42
@@ -51,7 +49,7 @@ void k_means_pp(vector<int> ¢roids_ind, vector<const Curve*> ¢roids, int
51
49
continue ;
52
50
}
53
51
54
- double dist = compute_distance (i, pos, dist_func);
52
+ double dist = compute_distance (input_curves[i], input_curves[ pos] , dist_func);
55
53
56
54
if (min_distance[i] == -1 || dist < min_distance[i]) {
57
55
if (min_distance[i] != -1 ) {
@@ -80,15 +78,14 @@ void k_means_pp(vector<int> ¢roids_ind, vector<const Curve*> ¢roids, int
80
78
}
81
79
82
80
is_centroid[pos] = true ;
83
- centroids_ind.push_back (pos);
84
81
centroids.push_back (&input_curves[pos]);
85
82
86
83
max_sum -= min_distance[pos] * min_distance[pos];
87
84
}
88
85
}
89
86
90
- double loyd_assignment (const vector<int > ¢roids, vector<int > &assign, vector<double > &close_dist, vector< double > &close_dist_sec, vector<vector< int > > &clusters, char *metric ) {
91
- double min_dist, min_dist_sec, dist, value = 0 ;
87
+ double loyd_assignment (const vector<const Curve* > ¢roids, vector<vector<int > > &clusters) {
88
+ double min_dist, dist, value = 0 ;
92
89
int p_centr;
93
90
94
91
for (int i = 0 ; i < (int )clusters.size (); ++i) {
@@ -99,67 +96,104 @@ double loyd_assignment(const vector<int> ¢roids, vector<int> &assign, vector
99
96
min_dist = -1 ;
100
97
101
98
for (int j = 0 ; j < (int )centroids.size (); ++j) {
102
- dist = compute_distance (i, centroids[j], metric );
99
+ dist = compute_distance (input_curves[i], * centroids[j], " DFT " );
103
100
104
101
if (min_dist == -1 || dist < min_dist) {
105
- min_dist_sec = min_dist;
106
102
min_dist = dist;
107
103
p_centr = j;
108
- } else if (min_dist_sec == -1 || dist < min_dist_sec) {
109
- min_dist_sec = dist;
110
104
}
111
105
}
112
-
106
+
113
107
clusters[p_centr].push_back (i);
114
- assign[i] = centroids[p_centr];
115
-
116
- close_dist[i] = min_dist;
117
- close_dist_sec[i] = min_dist_sec;
118
-
119
108
value += min_dist;
120
109
}
121
110
122
111
return value;
123
112
}
124
113
125
- double loyd_assignment (const vector<const Curve*> ¢roids, vector<vector<int > > &clusters) {
126
- double min_dist, dist, value = 0 ;
127
- int p_centr;
114
+ vector<int > range_search (const vector<HashTable> &hashtables, const vector<Curve> ¢roids, int dim, int L, int k, double delta) {
115
+ vector<int > assignment ((int )input_curves.size (), -1 );
116
+ vector<set<Curve> > R_closest_curves ((int )centroids.size ());
117
+ vector<bool > grid_curves_found ((int )centroids.size (), false ), visited ((int )input_curves.size (), false );
118
+ double minim = -1 ;
128
119
129
- for (int i = 0 ; i < (int )clusters.size (); ++i) {
130
- clusters[i].clear ();
120
+ for (int i = 0 ; i < (int )centroids.size (); ++i) {
121
+ centroids[i].print_curve ();
122
+
123
+ for (int j = i + 1 ; j < (int )centroids.size (); ++j) {
124
+ double dist = compute_distance (centroids[i], centroids[j], " DFT" );
125
+
126
+ if (minim == -1 || dist < minim) {
127
+ minim = dist;
128
+ }
129
+ }
131
130
}
132
131
133
- for (int i = 0 ; i < (int )input_curves.size (); ++i) {
134
- min_dist = -1 ;
132
+ double R = minim / 2.0 ;
133
+ cout << " R = " << R << endl;
134
+
135
+ while (1 ) {
136
+ general_search (hashtables, L, delta, k, R, " classic" , " DFT" , R_closest_curves, centroids, grid_curves_found, visited);
137
+ bool found = false ;
138
+
139
+ for (int i = 0 ; i < (int )R_closest_curves.size (); ++i) {
140
+ for (set<Curve>::const_iterator itr = R_closest_curves[i].begin (); itr != R_closest_curves[i].end (); ++itr) {
141
+ int id = itr->get_int_id ();
142
+
143
+ itr->print_curve ();
144
+
145
+ if (assignment[id] == -1 ) {
146
+ assignment[id] = i;
147
+ found = true ;
148
+ } else {
149
+ int dist_1 = compute_distance (*itr, centroids[i], " DFT" );
150
+ int dist_2 = compute_distance (*itr, centroids[assignment[id]], " DFT" );
151
+
152
+ if (dist_1 < dist_2) {
153
+ assignment[id] = i;
154
+ found = true ;
155
+ }
156
+ }
157
+ }
158
+ }
135
159
136
- for (int j = 0 ; j < (int )centroids.size (); ++j) {
137
- dist = discrete_frechet_distance (input_curves[i], *centroids[j]);
160
+ if (!found) {
161
+ break ;
162
+ }
138
163
139
- if (min_dist == -1 || dist < min_dist) {
140
- min_dist = dist;
141
- p_centr = j;
164
+ R *= 2 ;
165
+ }
166
+
167
+ // assignment for long points
168
+ for (int i = 0 ; i < (int )assignment.size (); ++i) {
169
+ if (assignment[i] == -1 ) {
170
+ double minim = -1 ;
171
+
172
+ for (int j = 0 ; j < (int )centroids.size (); ++j) {
173
+ double dist = compute_distance (input_curves[i], centroids[j], " DFT" );
174
+
175
+ if (minim == -1 || dist < minim) {
176
+ minim = dist;
177
+ assignment[i] = j;
178
+ }
142
179
}
143
180
}
144
-
145
- clusters[p_centr].push_back (i);
146
- value += min_dist;
147
181
}
148
-
149
- return value ;
182
+
183
+ return assignment ;
150
184
}
151
185
152
- double swap_update_centroid (int old_centr, int new_centr, const vector<int > &assign, const vector<double > &close_dist, const vector<double > &close_dist_sec, char *metric ) {
186
+ double swap_update_centroid (const vector< const Curve*> ¢roids, int old_centr, int new_centr, const vector<int > &assign, const vector<double > &close_dist, const vector<double > &close_dist_sec) {
153
187
double value = 0 ;
154
188
155
189
for (int i = 0 ; i < (int )assign.size (); ++i) {
156
- if ((i != old_centr && (assign[i] == i)) || i == new_centr) {
190
+ if ((i != old_centr && (centroids[ assign[i]]-> get_int_id () == i)) || i == new_centr) {
157
191
continue ;
158
192
}
159
193
160
- double dist = compute_distance (i, new_centr, metric );
194
+ double dist = compute_distance (input_curves[i], input_curves[ new_centr], " DFT " );
161
195
162
- if (assign[i] != old_centr) {
196
+ if (centroids[ assign[i]]-> get_int_id () != old_centr) {
163
197
value += min (dist, close_dist[i]);
164
198
} else {
165
199
value += min (dist, close_dist_sec[i]);
@@ -169,25 +203,45 @@ double swap_update_centroid(int old_centr, int new_centr, const vector<int> &ass
169
203
return value;
170
204
}
171
205
172
- bool PAM_update (vector<int > ¢roids, const vector<int > &assign, const vector<double > &close_dist, const vector<double > &close_dist_sec, double value, const vector<int > &cluster, int p_clust, char *metric) {
206
+ bool PAM_update (vector<const Curve*> ¢roids, double value, const vector<vector<int > > &clusters) {
207
+ vector<double > close_dist ((int )input_curves.size (), -1 ), close_dist_sec ((int )input_curves.size (), -1 );
208
+ vector<int > assign ((int )input_curves.size ());
209
+
173
210
double min_value = value;
174
- int new_cent = - 1 ;
211
+ int p_new_cent, p_clust ;
175
212
176
- for (int i = 0 ; i < (int )cluster.size (); ++i) {
177
- if (cluster[i] == centroids[p_clust]) {
178
- continue ;
213
+ for (int i = 0 ; i < (int )input_curves.size (); ++i) {
214
+ for (int j = 0 ; j < (int )centroids.size (); ++j) {
215
+ double dist = compute_distance (input_curves[i], *centroids[j], " DFT" );
216
+
217
+ if (close_dist[i] == -1 || dist < close_dist[i]) {
218
+ close_dist_sec[i] = close_dist[i];
219
+ close_dist[i] = dist;
220
+ assign[i] = j;
221
+ } else if (close_dist_sec[i] == -1 || close_dist_sec[i] < dist) {
222
+ close_dist_sec[i] = dist;
223
+ }
179
224
}
180
-
181
- double new_value = swap_update_centroid (centroids[p_clust], cluster[i], assign, close_dist, close_dist_sec, metric);
225
+ }
226
+
227
+ for (int i = 0 ; i < (int )clusters.size (); ++i) {
228
+ for (int j = 0 ; j < (int )clusters[i].size (); ++j) {
229
+ if (clusters[i][j] == centroids[i]->get_int_id ()) {
230
+ continue ;
231
+ }
232
+
233
+ double new_value = swap_update_centroid (centroids, centroids[i]->get_int_id (), clusters[i][j], assign, close_dist, close_dist_sec);
182
234
183
- if (new_value < min_value) {
184
- min_value = new_value;
185
- new_cent = cluster[i];
235
+ if (new_value < min_value) {
236
+ min_value = new_value;
237
+ p_new_cent = clusters[i][j];
238
+ p_clust = i;
239
+ }
186
240
}
187
241
}
188
-
242
+
189
243
if (value > min_value) {
190
- centroids[p_clust] = new_cent ;
244
+ centroids[p_clust] = &input_curves[p_new_cent] ;
191
245
return true ;
192
246
}
193
247
@@ -202,34 +256,31 @@ bool mean_frechet_update(vector<const Curve*> ¢roids, const vector<vector<in
202
256
const Curve *mean_curve = tree.get_mean ();
203
257
204
258
if (!centroids[i]->equal_curves (*mean_curve)) {
205
- centroids[i] = mean_curve;
259
+ centroids[i] = mean_curve;
206
260
check = true ;
207
261
}
208
262
}
209
263
210
264
return check;
211
265
}
212
266
213
- void clustering (char *metric ) {
267
+ void clustering (const vector<HashTable> &hashtables, int L, int k, double delta ) {
214
268
vector<const Curve*> centroids;
215
- vector<int > assignment (input_curves.size ()), centroids_ind ;
269
+ vector<int > assignment (input_curves.size ());
216
270
vector<double > close_dist (input_curves.size ()), close_dist_sec (input_curves.size ());
217
271
bool check;
218
272
double value;
219
273
int num_of_clusters = 2 ;
220
-
221
- k_means_pp (centroids_ind, centroids, input_curves.size (), num_of_clusters, metric );
274
+
275
+ k_means_pp (centroids, input_curves.size (), num_of_clusters, " DFT " );
222
276
cout << " initialization ended" << endl;
223
277
224
278
vector<vector<int > > clusters (num_of_clusters);
225
279
226
280
do {
227
- for (int i = 0 ; i < num_of_clusters; ++i) {
228
- // value = loyd_assignment(centroids_ind, assignment, close_dist, close_dist_sec, clusters);
229
- value = loyd_assignment (centroids, clusters);
230
- // check = PAM_update(centroids_ind, assignment, close_dist, close_dist_sec, value, clusters[i], i, metric);
231
- check = mean_frechet_update (centroids, clusters);
232
- }
281
+ value = loyd_assignment (centroids, clusters);
282
+ // check = PAM_update(centroids, value, clusters);
283
+ check = mean_frechet_update (centroids, clusters);
233
284
} while (check);
234
285
235
286
double min_s = -1 , max_s = -1 ;
0 commit comments