Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Synthetic #82

Merged
merged 43 commits into from
Mar 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
ace2403
Modify: turn Grid Width from int to double
GabrielWuNR Dec 18, 2021
c167607
add parameter of DStream in utils
GabrielWuNR Dec 18, 2021
f0e5657
benchmark modify
GabrielWuNR Dec 19, 2021
9deb3bb
benchmark modify --add bash
GabrielWuNR Dec 19, 2021
d4161bb
benchmark modify --add bash
GabrielWuNR Jan 3, 2022
ce91a9f
just for the accuracy test
GabrielWuNR Jan 4, 2022
ba07664
benchmark modify --add bash
GabrielWuNR Jan 3, 2022
6bcce6b
benchmark modify --add bash
GabrielWuNR Jan 3, 2022
d6bbaca
benchmark modify --add bash
GabrielWuNR Jan 4, 2022
1b66f40
Comment out SESAME_DEBUG and SESAME_INFO
GabrielWuNR Jan 4, 2022
5048a96
experiment code all complete
GabrielWuNR Jan 6, 2022
c59eb0e
forget local dataset
GabrielWuNR Jan 6, 2022
c5acec2
forgot this one
GabrielWuNR Jan 7, 2022
1c91dd8
timer modify for CDF
GabrielWuNR Jan 11, 2022
2925708
timer modify for CDF
GabrielWuNR Jan 11, 2022
8121f1f
timer reduce
GabrielWuNR Jan 12, 2022
cea3120
fix some double issue
GabrielWuNR Jan 12, 2022
114df48
1.26
tuidan Jan 26, 2022
dadf204
1.27
tuidan Jan 27, 2022
9049bdb
fix window issue of DBStream and DensSTREAM
GabrielWuNR Jan 28, 2022
52d5299
1.28
tuidan Jan 28, 2022
434a8b4
All bugs and low performance issues has been eliminated, Figure deadl…
GabrielWuNR Jan 30, 2022
2a6e103
All bugs and low performance issues has been eliminated, Figure deadl…
GabrielWuNR Jan 30, 2022
318fc14
All bugs and low performance issues has been eliminated
GabrielWuNR Jan 31, 2022
224988b
benchmark modify for parameter in DStream
GabrielWuNR Jan 31, 2022
446b070
benchmark modify for parameter in DStream
GabrielWuNR Jan 31, 2022
92f777b
add cumulative time
GabrielWuNR Feb 1, 2022
29ccdc7
2.3
tuidan Feb 3, 2022
ee14c73
2.6
tuidan Feb 6, 2022
c008a09
2.6CMM
tuidan Feb 6, 2022
3dd91c9
2.7
tuidan Feb 7, 2022
9b6ba2c
2.7
tuidan Feb 7, 2022
8f9bdcd
2.7 diamond
tuidan Feb 7, 2022
382967d
2.8
tuidan Feb 8, 2022
feb85e0
2.8
tuidan Feb 8, 2022
b391cdc
timeter
tuidan Feb 8, 2022
e8084dc
2.8
GabrielWuNR Feb 8, 2022
8d794c7
timeter
tuidan Feb 8, 2022
10f4b29
timeter
tuidan Feb 8, 2022
30a0455
2.8
GabrielWuNR Feb 8, 2022
49b3794
3.3 Commit
tuidan Mar 3, 2022
d8d0c33
Update Benchmark.cpp
tuidan Mar 3, 2022
7a4eda6
Merge branch 'main' into Synthetic
tuidan Mar 3, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@
/benchmark/build/
/benchmark/release/
/cmake-*
/debug
150 changes: 0 additions & 150 deletions benchmark/datasets/Mock.txt

This file was deleted.

29 changes: 18 additions & 11 deletions benchmark/src/Benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,27 @@ int main(int argc, char **argv) {
//Parse parameters.
param_t cmd_params;
BenchmarkUtils::defaultParam(cmd_params);
cmd_params.pointNumber = 15120;
cmd_params.pointNumber = 3000;
cmd_params.seed = 10;
cmd_params.clusterNumber = 10;
cmd_params.clusterNumber = 7;
cmd_params.dimension = 54;
cmd_params.coresetSize = 100;
cmd_params.lastArrivingNum = 60;
cmd_params.timeWindow = 6;
cmd_params.timeInterval = 4;
cmd_params.onlineClusterNumber = 15;
cmd_params.radiusFactor = 2;
cmd_params.initBuffer = 500;
cmd_params.offlineTimeWindow = 2;
cmd_params.coresetSize = 600;
cmd_params.lastArrivingNum = 5;
cmd_params.timeWindow = 300;
cmd_params.timeInterval = 8;
cmd_params.onlineClusterNumber = 150;
cmd_params.radiusFactor = 20;
cmd_params.initBuffer = 20;
cmd_params.offlineTimeWindow = 0;
cmd_params.maxInternalNodes = 40;
cmd_params.maxLeafNodes = 20;
cmd_params.thresholdDistance = 10;
cmd_params.GTClusterNumber = 7;
cmd_params.timeDecay = false;


cmd_params.outputPath = "results.txt";
cmd_params.algoType = SESAME::CluStreamType;
cmd_params.algoType = SESAME::StreamKMeansType;
BenchmarkUtils::parseArgs(argc, argv, cmd_params);
std::vector<SESAME::PointPtr> input;
std::vector<SESAME::PointPtr> results;
Expand Down
1 change: 1 addition & 0 deletions include/Algorithm/OfflineClustering/KMeans.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class KMeans : public SESAME::OfflineClustering {
void produceResult(std::vector<std::vector<PointPtr>> &groups, DataSinkPtr sinkPtr);
void runKMeans(int numberOfCenters,
int numberOfInput,
std::vector<PointPtr> &centers,
std::vector<PointPtr> &input,
std::vector<std::vector<PointPtr>> &oldGroups,
std::vector<std::vector<PointPtr>> &newGroups,
Expand Down
4 changes: 1 addition & 3 deletions include/Evaluation/Evaluation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@ namespace SESAME {

class Evaluation {
public:
static void runEvaluation(//int numberOfPoints,
// int numberOfCenters,
int dimension,
static void runEvaluation(int dimension,int GTClusterNumber, bool decay,
const std::vector<PointPtr> &inputs,
const std::vector<PointPtr> &results);

Expand Down
4 changes: 2 additions & 2 deletions include/Evaluation/Purity.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ class Purity {
private:
static void pointToGroup(const std::vector<SESAME::PointPtr> &input,
std::vector<std::vector<PointPtr>> &group, int number);
static int calculateBelongsFromTwo(std::vector<SESAME::PointPtr> &groupA,
static double calculateBelongsFromTwo(std::vector<SESAME::PointPtr> &groupA,
std::vector<SESAME::PointPtr> &groupB);
static double getMaxBelongs(std::vector<SESAME::PointPtr> &singleSample,
std::vector<std::vector<PointPtr>> &GT);
public:
static double purityCost(const std::vector<PointPtr> &input,
const std::vector<PointPtr> &result,
int dimension);
int dimension, int GTclusterNumber, bool decay);
};

}
Expand Down
2 changes: 2 additions & 0 deletions include/Utils/BenchmarkUtils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

struct param_t {
int pointNumber;
bool timeDecay;
int clusterNumber;
int dimension;
int coresetSize;
Expand All @@ -36,6 +37,7 @@ struct param_t {
int offlineTimeWindow;
int maxInternalNodes;
int maxLeafNodes;
int GTClusterNumber;
double thresholdDistance;
//used in DenStream(unique)
unsigned int minPoints;
Expand Down
2 changes: 1 addition & 1 deletion include/Utils/UtilityFunctions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#define CMM_KNN 10
#define CMM_A 0.998
#define CMM_LAMDA 1
#define CMM_THRESHOLD 1000
#define CMM_THRESHOLD 542

/*
Determines when Lloyd terminates (should be between 0 and 1)
Expand Down
28 changes: 18 additions & 10 deletions src/Algorithm/Birch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ void SESAME::Birch::runOfflineClustering(DataSinkPtr sinkPtr) {
sinkPtr->put(centroid->copy());
}
timerMeter.printTime(false,false,false,false);
SESAME_DEBUG( "The size of the centroid is :" << sinkPtr->getResults().size());
//SESAME_DEBUG( "The size of the centroid is :" << sinkPtr->getResults().size());

// std::vector<std::vector<PointPtr>> oldGroups, newGroups;
// this->kmeans->runKMeans((int)middleCentroids.size() / 2, (int)middleCentroids.size(),
// middleCentroids,oldGroups,newGroups, true);
Expand Down Expand Up @@ -128,12 +129,14 @@ void SESAME::Birch::selectChild(vector<SESAME::NodePtr> &children, SESAME::Point

// calculate the radius of a cluster
double SESAME::Birch::calculateRadius(SESAME::PointPtr &point, SESAME::PointPtr &centroid) {
timerMeter.dataInsertAccMeasure();
double denominator = 0;
double radius = 0;
for(int i = 0; i < point->getDimension(); i++) {
denominator += pow(centroid->getFeatureItem(i) - point->getFeatureItem(i), 2);
}
radius = sqrt(denominator);
timerMeter.dataInsertEndMeasure();
return radius;
}

Expand Down Expand Up @@ -213,22 +216,27 @@ void SESAME::Birch::forwardInsert(SESAME::PointPtr point){
if(curNode->getIsLeaf()) {
timerMeter.clusterUpdateAccMeasure();
CFPtr curCF = curNode->getCF();
timerMeter.dataInsertAccMeasure();
if(curCF->getN() == 0) {
initializeCF(curCF, point->getDimension());
}
PointPtr centroid = make_shared<Point>();
calculateCentroid(curCF, centroid);
timerMeter.dataInsertEndMeasure();
if(calculateRadius(point, centroid) <= this->cfTree->getT()) { // concept drift detection
// whether the new radius is lower than threshold T
timerMeter.dataInsertAccMeasure();
updateNLS(curNode, point, true);
timerMeter.clusterUpdateEndMeasure();
timerMeter.dataInsertEndMeasure();

// means this point could get included in this cluster
SESAME_DEBUG("No concept drift occurs(t <= T), insert tha point into the leaf node...");
//SESAME_DEBUG("No concept drift occurs(t <= T), insert tha point into the leaf node...");
break;
// Normally insert the data point into the tree leafNode without concept drift
} else {
// concept drift adaption
SESAME_DEBUG("Concept drift occurs(t > T), the current leaf node capacity reaches the threshold T");
// SESAME_DEBUG("Concept drift occurs(t > T), the current leaf node capacity reaches the threshold T");
timerMeter.clusterUpdateAccMeasure();
backwardEvolution(curNode, point);
timerMeter.clusterUpdateEndMeasure();
break;
Expand All @@ -246,7 +254,7 @@ void SESAME::Birch::forwardInsert(SESAME::PointPtr point){
// concept drift adaption
void SESAME::Birch::backwardEvolution(SESAME::NodePtr &curNode, SESAME::PointPtr &point) {
if(curNode->getParents().empty()) { // means current node is root node
SESAME_DEBUG("l <= L, create a new leaf node and insert the point into it(root change)");
//SESAME_DEBUG("l <= L, create a new leaf node and insert the point into it(root change)");
NodePtr newRoot = make_shared<CFNode>();
newRoot->setIsLeaf(false);
newRoot->setChild(curNode);
Expand Down Expand Up @@ -278,13 +286,13 @@ void SESAME::Birch::backwardEvolution(SESAME::NodePtr &curNode, SESAME::PointPtr
parent->setChild(newNode);
if(parent->getChildren().size() <= this->cfTree->getL()){
// whether the number of CFs(clusters) in the current leaf node is lower thant threshold L
SESAME_DEBUG("l <= L, create a new leaf node and insert the point into it");
//SESAME_DEBUG("l <= L, create a new leaf node and insert the point into it");

// update the parent node
updateNLS(parent, point, true);
} else{
SESAME_DEBUG("l > L, parent node of the current leaf node capacity reaches the threshold L");
SESAME_DEBUG("split a new parent node from the old one ");
// SESAME_DEBUG("l > L, parent node of the current leaf node capacity reaches the threshold L");
//SESAME_DEBUG("split a new parent node from the old one ");
bool CurNodeIsLeaf = true;
while(true) {
NodePtr parParent;
Expand Down Expand Up @@ -370,10 +378,10 @@ void SESAME::Birch::backwardEvolution(SESAME::NodePtr &curNode, SESAME::PointPtr
}

if(parParent->getChildren().size() <= this->cfTree->getB()) {
SESAME_DEBUG("b < B, remove the old node and insert the new nodeA and nodeB into the parent node");
//SESAME_DEBUG("b < B, remove the old node and insert the new nodeA and nodeB into the parent node");
break;
}else {
SESAME_DEBUG("b >= B, parent node of the current interior node capacity reaches the threshold B");
//SESAME_DEBUG("b >= B, parent node of the current interior node capacity reaches the threshold B");
curNode = curNode->getParents()[0];
parent = parParent;
CurNodeIsLeaf = false;
Expand Down
Loading