-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcuLazyNN_RF.cu
129 lines (90 loc) · 3.2 KB
/
cuLazyNN_RF.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#include "cuLazyNN_RF.cuh"
#include <iostream>
// tcpp
#include "tcpp/tree.hpp"
#include "tcpp/rf.hpp"
inline void prepareTrainSamples(RF *rf, Dataset &training, cuSimilarity *k_nearest, unsigned int K)
{
for(int i = 0; i < K; i++) {
DTDocument *doc = new DTDocument();
cuSimilarity &sim = k_nearest[i];
unsigned int idx = sim.doc_id;
doc->set_id(Utils::toString(idx));
doc->set_class(Utils::toString(training.getSamples()[idx].y));
std::map<unsigned int, float>::iterator it;
for(it = training.getSamples()[idx].features.begin(); it != training.getSamples()[idx].features.end(); ++it){
unsigned int term_id = it->first;
float term_count = it->second;
doc->insert_term(term_id, term_count); //* log((double)training.size() / float(max(1, training.getIdf(term_id))));
}
rf->add_document(doc);
}
}
cuLazyNN_RF::cuLazyNN_RF(){
}
cuLazyNN_RF::~cuLazyNN_RF(){
}
cuLazyNN_RF::cuLazyNN_RF(Dataset &data, int n_gpus) : cuKNN(data, n_gpus){
training = data;
}
void cuLazyNN_RF::train(Dataset &data){
training = data;
cuKNN.train(data);
}
int cuLazyNN_RF::classify(const std::map<unsigned int, float> &test_features, int K){
DTDocument * doc = new DTDocument();
doc->set_id("0");doc->set_class("1");
Scores<double> similarities(doc->get_id(), doc->get_class());
std::map<unsigned int, float>::const_iterator it;
for(it = test_features.begin(); it != test_features.end(); ++it){
unsigned int term_id = it->first;
float term_count = it->second;
doc->insert_term(term_id, term_count);
}
cuSimilarity *k_nearest = cuKNN.getKNearestNeighbors(test_features, K);
//printf("cuLazyNN_RF\n");
RF * rf = new RF(0, 0.03, 200);
prepareTrainSamples(rf, training, k_nearest, K);
rf->build();
similarities = rf->classify(doc);
delete rf;
delete doc;
delete[] k_nearest;
return atoi(similarities.top().class_name.c_str());
//return cuKNN.getMajorityVote(k_nearest, K);
}
std::vector<int> cuLazyNN_RF::classify(Dataset &test, int K){
std::vector<cuSimilarity*> idxs = cuKNN.getKNearestNeighbors(test, K);
std::vector<int> pred;
int correct_cosine = 0, wrong_cosine = 0;
for (int i = 0; i < idxs.size(); ++i)
{
DTDocument * doc = new DTDocument();
doc->set_id("0");doc->set_class("1");
Scores<double> similarities(doc->get_id(), doc->get_class());
std::map<unsigned int, float> &test_features = test.getSamples()[i].features;
std::map<unsigned int, float>::const_iterator it;
for(it = test_features.begin(); it != test_features.end(); ++it){
unsigned int term_id = it->first;
float term_count = it->second;
doc->insert_term(term_id, term_count);
}
RF * rf = new RF(0, 0.03, 100);
prepareTrainSamples(rf, training, idxs[i], K);
rf->build();
similarities = rf->classify(doc);
delete rf;
delete doc;
free(idxs[i]);
pred.push_back(atoi(similarities.top().class_name.c_str()));
if(pred.back() == test.getSamples()[i].y) {
correct_cosine++;
} else {
wrong_cosine++;
}
std::cerr.precision(4);
std::cerr.setf(std::ios::fixed);
std::cerr << "\r" << double(i+1)/test.getSamples().size() * 100 << "%" << " - " << double(correct_cosine) / (i+1);
}
return pred;
}