Skip to content

Commit a1f9b2d

Browse files
committed
updates
1 parent 90e1e94 commit a1f9b2d

17 files changed

+617
-102
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
CC = g++
22
FLAGS = -g -Wall
33
OUT = run
4-
OBJS = main.o file_functions.o curve.o help_functions.o cluster.o distances.o binary_mean_tree.o
4+
OBJS = main.o file_functions.o curve.o help_functions.o cluster.o distances.o binary_mean_tree.o hash_functions.o hashtable.o list.o
55

66
run: $(OBJS)
77
$(CC) $(FLAGS) $^ -o $(OUT)

cluster.cpp

Lines changed: 112 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -24,21 +24,19 @@ vector<int> k_random_selection(int len, int k) {
2424
return pick_random_points;
2525
}
2626

27-
void k_means_pp(vector<int> &centroids_ind, vector<const Curve*> &centroids, int len, int k, const char *dist_func) {
27+
void k_means_pp(vector<const Curve*> &centroids, int len, int k, const char *dist_func) {
2828
vector<double> min_distance(len, -1);
2929
vector<bool> is_centroid(len, false);
3030
int pos;
3131
double max_sum = 0;
3232

33-
centroids_ind.reserve(k);
3433
centroids.reserve(k);
3534

3635
for (int t = 0; ; ++t) {
3736
if (!t) {
3837
pos = rand() % len;
3938

4039
is_centroid[pos] = true;
41-
centroids_ind.push_back(pos);
4240
centroids.push_back(&input_curves[pos]);
4341
}
4442

@@ -51,7 +49,7 @@ void k_means_pp(vector<int> &centroids_ind, vector<const Curve*> &centroids, int
5149
continue;
5250
}
5351

54-
double dist = compute_distance(i, pos, dist_func);
52+
double dist = compute_distance(input_curves[i], input_curves[pos], dist_func);
5553

5654
if (min_distance[i] == -1 || dist < min_distance[i]) {
5755
if (min_distance[i] != -1) {
@@ -80,15 +78,14 @@ void k_means_pp(vector<int> &centroids_ind, vector<const Curve*> &centroids, int
8078
}
8179

8280
is_centroid[pos] = true;
83-
centroids_ind.push_back(pos);
8481
centroids.push_back(&input_curves[pos]);
8582

8683
max_sum -= min_distance[pos] * min_distance[pos];
8784
}
8885
}
8986

90-
double loyd_assignment(const vector<int> &centroids, vector<int> &assign, vector<double> &close_dist, vector<double> &close_dist_sec, vector<vector<int> > &clusters, char *metric) {
91-
double min_dist, min_dist_sec, dist, value = 0;
87+
double loyd_assignment(const vector<const Curve*> &centroids, vector<vector<int> > &clusters) {
88+
double min_dist, dist, value = 0;
9289
int p_centr;
9390

9491
for (int i = 0; i < (int)clusters.size(); ++i) {
@@ -99,67 +96,104 @@ double loyd_assignment(const vector<int> &centroids, vector<int> &assign, vector
9996
min_dist = -1;
10097

10198
for (int j = 0; j < (int)centroids.size(); ++j) {
102-
dist = compute_distance(i, centroids[j], metric);
99+
dist = compute_distance(input_curves[i], *centroids[j], "DFT");
103100

104101
if (min_dist == -1 || dist < min_dist) {
105-
min_dist_sec = min_dist;
106102
min_dist = dist;
107103
p_centr = j;
108-
} else if (min_dist_sec == -1 || dist < min_dist_sec) {
109-
min_dist_sec = dist;
110104
}
111105
}
112-
106+
113107
clusters[p_centr].push_back(i);
114-
assign[i] = centroids[p_centr];
115-
116-
close_dist[i] = min_dist;
117-
close_dist_sec[i] = min_dist_sec;
118-
119108
value += min_dist;
120109
}
121110

122111
return value;
123112
}
124113

125-
double loyd_assignment(const vector<const Curve*> &centroids, vector<vector<int> > &clusters) {
126-
double min_dist, dist, value = 0;
127-
int p_centr;
114+
vector<int> range_search(const vector<HashTable> &hashtables, const vector<Curve> &centroids, int dim, int L, int k, double delta) {
115+
vector<int> assignment((int)input_curves.size(), -1);
116+
vector<set<Curve> > R_closest_curves((int)centroids.size());
117+
vector<bool> grid_curves_found((int)centroids.size(), false), visited((int)input_curves.size(), false);
118+
double minim = -1;
128119

129-
for (int i = 0; i < (int)clusters.size(); ++i) {
130-
clusters[i].clear();
120+
for (int i = 0; i < (int)centroids.size(); ++i) {
121+
centroids[i].print_curve();
122+
123+
for (int j = i + 1; j < (int)centroids.size(); ++j) {
124+
double dist = compute_distance(centroids[i], centroids[j], "DFT");
125+
126+
if (minim == -1 || dist < minim) {
127+
minim = dist;
128+
}
129+
}
131130
}
132131

133-
for (int i = 0; i < (int)input_curves.size(); ++i) {
134-
min_dist = -1;
132+
double R = minim / 2.0;
133+
cout << "R = " << R << endl;
134+
135+
while (1) {
136+
general_search(hashtables, L, delta, k, R, "classic", "DFT", R_closest_curves, centroids, grid_curves_found, visited);
137+
bool found = false;
138+
139+
for (int i = 0; i < (int)R_closest_curves.size(); ++i) {
140+
for (set<Curve>::const_iterator itr = R_closest_curves[i].begin(); itr != R_closest_curves[i].end(); ++itr) {
141+
int id = itr->get_int_id();
142+
143+
itr->print_curve();
144+
145+
if (assignment[id] == -1) {
146+
assignment[id] = i;
147+
found = true;
148+
} else {
149+
int dist_1 = compute_distance(*itr, centroids[i], "DFT");
150+
int dist_2 = compute_distance(*itr, centroids[assignment[id]], "DFT");
151+
152+
if (dist_1 < dist_2) {
153+
assignment[id] = i;
154+
found = true;
155+
}
156+
}
157+
}
158+
}
135159

136-
for (int j = 0; j < (int)centroids.size(); ++j) {
137-
dist = discrete_frechet_distance(input_curves[i], *centroids[j]);
160+
if (!found) {
161+
break;
162+
}
138163

139-
if (min_dist == -1 || dist < min_dist) {
140-
min_dist = dist;
141-
p_centr = j;
164+
R *= 2;
165+
}
166+
167+
// assignment for long points
168+
for (int i = 0; i < (int)assignment.size(); ++i) {
169+
if (assignment[i] == -1) {
170+
double minim = -1;
171+
172+
for (int j = 0; j < (int)centroids.size(); ++j) {
173+
double dist = compute_distance(input_curves[i], centroids[j], "DFT");
174+
175+
if (minim == -1 || dist < minim) {
176+
minim = dist;
177+
assignment[i] = j;
178+
}
142179
}
143180
}
144-
145-
clusters[p_centr].push_back(i);
146-
value += min_dist;
147181
}
148-
149-
return value;
182+
183+
return assignment;
150184
}
151185

152-
double swap_update_centroid(int old_centr, int new_centr, const vector<int> &assign, const vector<double> &close_dist, const vector<double> &close_dist_sec, char *metric) {
186+
double swap_update_centroid(const vector<const Curve*> &centroids, int old_centr, int new_centr, const vector<int> &assign, const vector<double> &close_dist, const vector<double> &close_dist_sec) {
153187
double value = 0;
154188

155189
for (int i = 0; i < (int)assign.size(); ++i) {
156-
if ((i != old_centr && (assign[i] == i)) || i == new_centr) {
190+
if ((i != old_centr && (centroids[assign[i]]->get_int_id() == i)) || i == new_centr) {
157191
continue;
158192
}
159193

160-
double dist = compute_distance(i, new_centr, metric);
194+
double dist = compute_distance(input_curves[i], input_curves[new_centr], "DFT");
161195

162-
if (assign[i] != old_centr) {
196+
if (centroids[assign[i]]->get_int_id() != old_centr) {
163197
value += min(dist, close_dist[i]);
164198
} else {
165199
value += min(dist, close_dist_sec[i]);
@@ -169,25 +203,45 @@ double swap_update_centroid(int old_centr, int new_centr, const vector<int> &ass
169203
return value;
170204
}
171205

172-
bool PAM_update(vector<int> &centroids, const vector<int> &assign, const vector<double> &close_dist, const vector<double> &close_dist_sec, double value, const vector<int> &cluster, int p_clust, char *metric) {
206+
bool PAM_update(vector<const Curve*> &centroids, double value, const vector<vector<int> > &clusters) {
207+
vector<double> close_dist((int)input_curves.size(), -1), close_dist_sec((int)input_curves.size(), -1);
208+
vector<int> assign((int)input_curves.size());
209+
173210
double min_value = value;
174-
int new_cent = -1;
211+
int p_new_cent, p_clust;
175212

176-
for (int i = 0; i < (int)cluster.size(); ++i) {
177-
if (cluster[i] == centroids[p_clust]) {
178-
continue;
213+
for (int i = 0; i < (int)input_curves.size(); ++i) {
214+
for (int j = 0; j < (int)centroids.size(); ++j) {
215+
double dist = compute_distance(input_curves[i], *centroids[j], "DFT");
216+
217+
if (close_dist[i] == -1 || dist < close_dist[i]) {
218+
close_dist_sec[i] = close_dist[i];
219+
close_dist[i] = dist;
220+
assign[i] = j;
221+
} else if (close_dist_sec[i] == -1 || close_dist_sec[i] < dist) {
222+
close_dist_sec[i] = dist;
223+
}
179224
}
180-
181-
double new_value = swap_update_centroid(centroids[p_clust], cluster[i], assign, close_dist, close_dist_sec, metric);
225+
}
226+
227+
for (int i = 0; i < (int)clusters.size(); ++i) {
228+
for (int j = 0; j < (int)clusters[i].size(); ++j) {
229+
if (clusters[i][j] == centroids[i]->get_int_id()) {
230+
continue;
231+
}
232+
233+
double new_value = swap_update_centroid(centroids, centroids[i]->get_int_id(), clusters[i][j], assign, close_dist, close_dist_sec);
182234

183-
if (new_value < min_value) {
184-
min_value = new_value;
185-
new_cent = cluster[i];
235+
if (new_value < min_value) {
236+
min_value = new_value;
237+
p_new_cent = clusters[i][j];
238+
p_clust = i;
239+
}
186240
}
187241
}
188-
242+
189243
if (value > min_value) {
190-
centroids[p_clust] = new_cent;
244+
centroids[p_clust] = &input_curves[p_new_cent];
191245
return true;
192246
}
193247

@@ -202,34 +256,31 @@ bool mean_frechet_update(vector<const Curve*> &centroids, const vector<vector<in
202256
const Curve *mean_curve = tree.get_mean();
203257

204258
if (!centroids[i]->equal_curves(*mean_curve)) {
205-
centroids[i] = mean_curve;
259+
centroids[i] = mean_curve;
206260
check = true;
207261
}
208262
}
209263

210264
return check;
211265
}
212266

213-
void clustering(char *metric) {
267+
void clustering(const vector<HashTable> &hashtables, int L, int k, double delta) {
214268
vector<const Curve*> centroids;
215-
vector<int> assignment(input_curves.size()), centroids_ind;
269+
vector<int> assignment(input_curves.size());
216270
vector<double> close_dist(input_curves.size()), close_dist_sec(input_curves.size());
217271
bool check;
218272
double value;
219273
int num_of_clusters = 2;
220-
221-
k_means_pp(centroids_ind, centroids, input_curves.size(), num_of_clusters, metric);
274+
275+
k_means_pp(centroids, input_curves.size(), num_of_clusters, "DFT");
222276
cout << "initialization ended" << endl;
223277

224278
vector<vector<int> > clusters(num_of_clusters);
225279

226280
do {
227-
for (int i = 0; i < num_of_clusters; ++i) {
228-
//value = loyd_assignment(centroids_ind, assignment, close_dist, close_dist_sec, clusters);
229-
value = loyd_assignment(centroids, clusters);
230-
//check = PAM_update(centroids_ind, assignment, close_dist, close_dist_sec, value, clusters[i], i, metric);
231-
check = mean_frechet_update(centroids, clusters);
232-
}
281+
value = loyd_assignment(centroids, clusters);
282+
//check = PAM_update(centroids, value, clusters);
283+
check = mean_frechet_update(centroids, clusters);
233284
} while(check);
234285

235286
double min_s = -1, max_s = -1;

cluster.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,17 @@
22
#define CLUSTER_H
33

44
#include "curve.h"
5+
#include "hashtable.h"
56
#include <vector>
67

78
using namespace std;
89

910
vector<int> k_random_selection(int, int);
10-
void k_means_pp(vector<int>&, vector<const Curve*>&, int, int, const char*);
11-
double loyd_assignment(const vector<int>&, vector<int>&, vector<double>&, vector<double>&, vector<vector<int> >&, char*);
11+
void k_means_pp(vector<const Curve*>&, int, int, const char*);
1212
double loyd_assignment(const vector<const Curve*>&, vector<vector<int> >&);
13-
bool PAM_update(vector<int>&, const vector<int>&, const vector<double>&, const vector<double>&, double, const vector<int>&, int, char*);
13+
vector<int> range_search(const vector<HashTable> &, const vector<Curve>&, int, int, int, double);
14+
bool PAM_update(vector<int>&, double, const vector<int>&);
1415
bool mean_frechet_update(vector<const Curve*>&, const vector<vector<int> >&);
15-
void clustering(char*);
16+
void clustering(const vector<HashTable> &hashtables, int, int, double);
1617

1718
#endif

curve.cpp

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,27 +4,35 @@ vector<Curve> input_curves;
44

55
Curve::Curve() {
66
this->id = "";
7+
this->int_id = -1;
78
}
89

9-
Curve::Curve(string id, int dim) {
10+
Curve::Curve(string id, int int_id, int dim) {
1011
this->id = id;
12+
this->int_id = int_id;
1113
this->dim = dim;
1214
}
1315

14-
Curve::Curve(string id, int dim, const vector<vector<double> > &curve) {
16+
Curve::Curve(string id, int int_id, int dim,const vector<vector<double> > &curve) {
1517
this->id = id;
18+
this->int_id = int_id;
1619
this->dim = dim;
1720
this->curve = curve;
1821
}
1922

20-
void Curve::set_id(string id) {
23+
void Curve::set_id(string id, int int_id) {
2124
this->id = id;
25+
this->int_id = int_id;
2226
}
2327

2428
void Curve::insert_point(const vector<double> &point) {
2529
curve.push_back(point);
2630
}
2731

32+
void Curve::clear_curve() {
33+
curve.clear();
34+
}
35+
2836
int Curve::get_dimension() const {
2937
return dim;
3038
}
@@ -37,6 +45,10 @@ string Curve::get_id() const {
3745
return id;
3846
}
3947

48+
int Curve::get_int_id() const {
49+
return int_id;
50+
}
51+
4052
double Curve::get_coord_point(int coord, int pnt) const {
4153
return curve[pnt][coord];
4254
}
@@ -66,7 +78,8 @@ bool Curve::is_empty() const {
6678
}
6779

6880
void Curve::print_curve() const {
69-
cout << "Id: " << id << endl;
81+
cout << "Id: " << id << "\n";
82+
cout << "int_Id: " << int_id << "\n";
7083

7184
for (int i = 0; i < (int)curve.size(); ++i) {
7285
cout << "Point " << i << ": ";
@@ -82,11 +95,8 @@ void Curve::print_curve() const {
8295
}
8396

8497
void Curve::append_curve(const Curve &new_curve) {
85-
vector<double> temp;
86-
8798
for (int i = 0; i < new_curve.get_length(); ++i) {
88-
temp = new_curve.get_point(i);
89-
curve.push_back(temp);
99+
curve.push_back(new_curve.get_point(i));
90100
}
91101
}
92102

0 commit comments

Comments
 (0)