From 7e335ddb5417cfa19e2b9349204ea938b2ceb3e1 Mon Sep 17 00:00:00 2001 From: s1926539 Date: Tue, 31 Aug 2021 00:56:10 +0800 Subject: [PATCH 01/21] Finish Coding half of DBStream --- include/Algorithm/DBStream.hpp | 56 +++++++++ .../DataStructure/DataStructureFactory.hpp | 8 +- .../Algorithm/DataStructure/MicroCluster.hpp | 22 ++-- .../DataStructure/WeightedAdjacencyList.hpp | 44 +++++++ include/Algorithm/DenStream.hpp | 3 +- include/Utils/BenchmarkUtils.hpp | 5 + src/Algorithm/AlgorithmFactory.cpp | 5 + src/Algorithm/CMakeLists.txt | 1 + src/Algorithm/DBStream.cpp | 113 ++++++++++++++++++ src/Algorithm/DataStructure/CMakeLists.txt | 1 + .../DataStructure/DataStructureFactory.cpp | 19 ++- src/Algorithm/DataStructure/MicroCluster.cpp | 49 +++++++- src/Algorithm/DataStructure/Snapshot.cpp | 6 +- .../DataStructure/WeightedAdjacencyList.cpp | 35 ++++++ src/Algorithm/DenStream.cpp | 12 +- test/CMakeLists.txt | 6 +- 16 files changed, 359 insertions(+), 26 deletions(-) create mode 100644 include/Algorithm/DBStream.hpp create mode 100644 include/Algorithm/DataStructure/WeightedAdjacencyList.hpp create mode 100644 src/Algorithm/DBStream.cpp create mode 100644 src/Algorithm/DataStructure/WeightedAdjacencyList.cpp diff --git a/include/Algorithm/DBStream.hpp b/include/Algorithm/DBStream.hpp new file mode 100644 index 00000000..85f65211 --- /dev/null +++ b/include/Algorithm/DBStream.hpp @@ -0,0 +1,56 @@ +// +// Created by 1124a on 2021/8/30. +// + +#ifndef SESAME_INCLUDE_ALGORITHM_DBSTREAM_HPP_ +#define SESAME_INCLUDE_ALGORITHM_DBSTREAM_HPP_ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace SESAME { +class DBStreamParams : public AlgorithmParameters { + public: + double radius; + double lambda; + int cleanUpInterval;//Tgap + double weightMin;//minimum weight + double alpha;//α, intersection factor + double base;//base of decay function +}; +class DBStream : public Algorithm + { + public: + DBStreamParams dbStreamParams; + DampedWindowPtr dampedWindow; + unordered_set microClusters; + SESAME::WeightedAdjacencyList weightedAdjacencyList; + double weakEntry;//W_weak, weak entries + double aWeakEntry; + clock_t startTime; + clock_t pointArrivingTime; + int microClusterIndex; + //TODO Need to implement weighted a weighted adjacency list S + DBStream(param_t &cmd_params); + ~DBStream(); + void Initilize() override; + void runOnlineClustering(PointPtr input) override; + void runOfflineClustering(DataSinkPtr sinkPtr) override; + private: + bool isInitial = false; + // vector initialBuffer; + void update(PointPtr dataPoint); + void cleanUp(clock_t time); + bool checkMove( std::vector microClusters); + std::vector findFixedRadiusNN(PointPtr dataPoint, double decayFactor); + }; + +} +#endif //SESAME_INCLUDE_ALGORITHM_DBSTREAM_HPP_ diff --git a/include/Algorithm/DataStructure/DataStructureFactory.hpp b/include/Algorithm/DataStructure/DataStructureFactory.hpp index 6ffea88c..21567023 100644 --- a/include/Algorithm/DataStructure/DataStructureFactory.hpp +++ b/include/Algorithm/DataStructure/DataStructureFactory.hpp @@ -13,7 +13,7 @@ #include #include #include - +#include namespace SESAME { class DataStructureFactory { @@ -28,12 +28,16 @@ class DataStructureFactory { static CoresetTreePtr createCoresetTree(); static void clearCoresetTree(CoresetTreePtr tree); static MicroClusterPtr createMicroCluster(int dimension, int id); + static MicroClusterPtr createMicroCluster(int dimension, int id,PointPtr dataPoint,double radius); static void clearMicroCluster(MicroClusterPtr microCluster); static SnapshotPtr createSnapshot(MicroClusters & otherMicroClusters,int elapsedTime); static void clearSnapshot(SnapshotPtr snapshot); static CFTreePtr createCFTree(); static NodePtr createNode(); - + static MicroClusterPairPtr createMicroClusterPair(MicroClusterPtr microCluster1,MicroClusterPtr microCluster2); + static void clearMicroClusterPair(MicroClusterPairPtr microClusterPair); + static AdjustedWeightPtr createAdjustedWeight(double weight, clock_t pointTime); + static void clearAdjustedWeight(AdjustedWeightPtr adjustedWeight); }; } #endif //SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_DATASTRUCTUREFACTORY_HPP_ diff --git a/include/Algorithm/DataStructure/MicroCluster.hpp b/include/Algorithm/DataStructure/MicroCluster.hpp index 330e3e67..2af59d63 100644 --- a/include/Algorithm/DataStructure/MicroCluster.hpp +++ b/include/Algorithm/DataStructure/MicroCluster.hpp @@ -32,23 +32,27 @@ class MicroCluster { int SST;//the sum of the squares of the time stamps Til... Tin double weight; //number of data point in the clusters int dimension; + double radius;//Used in DBStream + - //TODO Need to subtract Base class of CF vector when all cf-vector based-algorithms have been implemented //the parameters below is unique for DenStream clock_t createTime; clock_t lastUpdateTime; bool visited; - //TODO this may need to modify in the future (All algorithms used this, e.g.DenStream,CluStream,DenStream,DBStream,SWEM =.=) + //TODO 1. Need to subtract Base class of CF vector when all cf-vector based-algorithms have been implemented + // 2.this may need to modify in the future (All algorithms used this, e.g.DenStream,CluStream,DenStream,DBStream,SWEM =.=) + + MicroCluster(int dimension, int id); + MicroCluster(int dimension, int id,PointPtr dataPoint,double radius);//DBStream ~MicroCluster(); void init(PointPtr datapoint, int timestamp); - - void insert(PointPtr datapoint, int timestamp); - bool insert(PointPtr datapoint,double decayFactor,double epsilon);//Used in DenStream - + void insert(PointPtr datapoint, int timestamp);//Used in CluStream + bool insert(PointPtr datapoint,double decayFactor,double epsilon);// DenStream + void insert(PointPtr datapoint);//DBStream void merge(MicroClusterPtr other); - void substractClusterVector(MicroClusterPtr other); + void subtractClusterVector(MicroClusterPtr other); void updateId(MicroClusterPtr other); void resetID(int index); //Used in DenStream @@ -65,9 +69,13 @@ class MicroCluster { dataPoint getVarianceVector(); double calCentroidDistance(PointPtr datapoint); bool judgeMerge(MicroClusterPtr other); + double getDistance(PointPtr datapoint);//DBStream + double getDistance(MicroClusterPtr other);//DBStream + void move();//DBStream SESAME::MicroClusterPtr copy(); private: + double distance; static double inverseError(double x); }; } diff --git a/include/Algorithm/DataStructure/WeightedAdjacencyList.hpp b/include/Algorithm/DataStructure/WeightedAdjacencyList.hpp new file mode 100644 index 00000000..e7566e37 --- /dev/null +++ b/include/Algorithm/DataStructure/WeightedAdjacencyList.hpp @@ -0,0 +1,44 @@ +// +// Created by 1124a on 2021/8/30. +// + +#ifndef SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_WEIGHTEDADJACENCYLIST_HPP_ +#define SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_WEIGHTEDADJACENCYLIST_HPP_ +#include +#include +#include +#include +#include +#include +#include +#include +namespace SESAME { +class MicroClusterPair; +typedef std::shared_ptr MicroClusterPairPtr; +class MicroClusterPair{ + public: + MicroClusterPtr microCluster1; + MicroClusterPtr microCluster2; + MicroClusterPair( MicroClusterPtr microCluster1,MicroClusterPtr microCluster2); + bool equal(MicroClusterPairPtr other); +}; + +class AdjustedWeight; +typedef std::shared_ptr AdjustedWeightPtr; +class AdjustedWeight{ + public: + double weight; + clock_t updateTime; + AdjustedWeight(double weight, clock_t pointTime); + void add(clock_t startTime,double decayValue); + double getCurrentWeight(double decayFactor); +}; + +typedef std::unordered_map WeightedAdjacencyList; +typedef std::pair DensityGraph; +//S in paper, represent Weighted Adjacency List + +} + + +#endif //SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_WEIGHTEDADJACENCYLIST_HPP_ diff --git a/include/Algorithm/DenStream.hpp b/include/Algorithm/DenStream.hpp index a5a633ee..caa70b5a 100644 --- a/include/Algorithm/DenStream.hpp +++ b/include/Algorithm/DenStream.hpp @@ -40,7 +40,7 @@ class DenStreamParams : public AlgorithmParameters { clock_t startTime; clock_t pointArrivingTime; long Tp; - int iterpoint=0;//TODO DELETE LATER + int iterPoint=0;//TODO DELETE LATER int pMicroClusterIndex; int oMicroClusterIndex; @@ -62,7 +62,6 @@ class DenStreamParams : public AlgorithmParameters { int mergeToMicroCluster(PointPtr dataPoint,std::vector microClusters ); static void microClusterToPoint(std::vector µClusters, vector &points); - //TODO overlap functions with Clustream, may need to remove to utils folder }; } #endif //SESAME_INCLUDE_ALGORITHM_DENSTREAM_HPP_ diff --git a/include/Utils/BenchmarkUtils.hpp b/include/Utils/BenchmarkUtils.hpp index b1ed3464..134b577c 100644 --- a/include/Utils/BenchmarkUtils.hpp +++ b/include/Utils/BenchmarkUtils.hpp @@ -43,6 +43,11 @@ struct param_t { double lambda; double mu; double beta; + //used in DBStream + double radius; + int cleanUpInterval; + double weightMin; + double alpha; std::string inputPath; std::string outputPath; std::string algoName; diff --git a/src/Algorithm/AlgorithmFactory.cpp b/src/Algorithm/AlgorithmFactory.cpp index 1027e04f..a6696602 100644 --- a/src/Algorithm/AlgorithmFactory.cpp +++ b/src/Algorithm/AlgorithmFactory.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -27,5 +28,9 @@ SESAME::AlgorithmPtr SESAME::AlgorithmFactory::create(param_t &cmd_params) { shared_ptr denStream = std::make_shared(cmd_params); return (SESAME::AlgorithmPtr) denStream; } + if (cmd_params.algoName == "DBStream") { + shared_ptr dbStream = std::make_shared(cmd_params); + return (SESAME::AlgorithmPtr) dbStream; + } throw std::invalid_argument("Unsupported"); } diff --git a/src/Algorithm/CMakeLists.txt b/src/Algorithm/CMakeLists.txt index a31acc24..a6f7bf78 100644 --- a/src/Algorithm/CMakeLists.txt +++ b/src/Algorithm/CMakeLists.txt @@ -3,6 +3,7 @@ add_source_sesame( CluStream.cpp Birch.cpp DenStream.cpp + DBStream.cpp Algorithm.cpp AlgorithmFactory.cpp ) diff --git a/src/Algorithm/DBStream.cpp b/src/Algorithm/DBStream.cpp new file mode 100644 index 00000000..504ef7a9 --- /dev/null +++ b/src/Algorithm/DBStream.cpp @@ -0,0 +1,113 @@ +// +// Created by 1124a on 2021/8/30. +// +#include +#include +#include + +SESAME::DBStream::DBStream(param_t &cmd_params){ + this->dbStreamParams.radius=cmd_params.radius; + this->dbStreamParams.lambda=cmd_params.lambda; + this->dbStreamParams.cleanUpInterval=cmd_params.cleanUpInterval; + this->dbStreamParams.weightMin=cmd_params.weightMin; + this->dbStreamParams.alpha=cmd_params.alpha; + this->dbStreamParams.base=cmd_params.base; +} +SESAME::DBStream:: ~DBStream() += default; +void SESAME::DBStream::Initilize() { + this->dampedWindow = WindowFactory::createDampedWindow(dbStreamParams.base, dbStreamParams.lambda); + this->startTime = clock(); + this->weakEntry= pow(dbStreamParams.base,(-1)*dbStreamParams.lambda*dbStreamParams.cleanUpInterval); + this->aWeakEntry=weakEntry*dbStreamParams.alpha; + microClusters=unordered_set(); + this->microClusterIndex=-1; + } + + void SESAME::DBStream::runOnlineClustering(PointPtr input) { + input; + if (!this->isInitial) { + Initilize(); + this->isInitial = true; + } + else + { + + } +} +void SESAME::DBStream::runOfflineClustering(DataSinkPtr sinkPtr) { + sinkPtr; +} +void SESAME::DBStream::update(PointPtr dataPoint){ + if(dataPoint->getIndex()==0) + { + this->pointArrivingTime=clock(); + } + double decayFactor=dampedWindow->decayFunction(this->pointArrivingTime, clock()); + this->pointArrivingTime=clock(); + std::vector microClusterNN=findFixedRadiusNN(dataPoint,decayFactor); + int sizeNN=microClusterNN.size(); + if (microClusterNN.size() < 1) { + microClusterIndex++; + MicroClusterPtr newMicroCluster=SESAME::DataStructureFactory::createMicroCluster(dbStreamParams.dimension, + microClusterIndex,dataPoint, + dbStreamParams.radius); + microClusters.insert(newMicroCluster); + microClusterNN.push_back(newMicroCluster); + } + else { + for (int i = 0; i < sizeNN; i++) { + MicroClusterPtr microCluster = microClusterNN.at(i); + microCluster->insert(dataPoint); // just update weight + // update shared density + for (int j = i + 1; j < sizeNN; j++) { + MicroClusterPairPtr microClusterPair =SESAME::DataStructureFactory::createMicroClusterPair(microCluster, + microClusterNN.at(j)); + if (weightedAdjacencyList.find(microClusterPair) != weightedAdjacencyList.end()) { + clock_t startTime= weightedAdjacencyList[microClusterPair]->updateTime; + double decayValue = dampedWindow->decayFunction(startTime,this->pointArrivingTime); + weightedAdjacencyList[microClusterPair]->add(this->pointArrivingTime,decayValue); + } else { + AdjustedWeightPtr adjustedWeight = SESAME::DataStructureFactory::createAdjustedWeight(1,this->pointArrivingTime); + DensityGraph densityGraph(microClusterPair,adjustedWeight); + weightedAdjacencyList.insert(densityGraph); + } + } + } + if (checkMove(microClusterNN)) { + for (MicroClusterPtr microCluster : microClusterNN) { + microCluster->move(); + } + } + if (pointArrivingTime% dbStreamParams.cleanUpInterval == 0 && dataPoint->getIndex()!=0) { + cleanUp(pointArrivingTime); + + } + // return microClusterNN; + } +} +bool SESAME::DBStream::checkMove( std::vector microClusters){ + int size = microClusters.size(); + for (int i = 0; i < size; i++) { + for (int j = i + 1; j < size; j++) { + if (microClusters.at(i)->getDistance(microClusters.at(j)) < dbStreamParams.radius) { + return false; + } + } + } + return true; +} + +//TODO Stop here +std::vector SESAME::DBStream::findFixedRadiusNN(PointPtr dataPoint, double decayFactor) +{ + std::vector microClusterList; + dataPoint; + decayFactor; + return microClusterList; + +} +void SESAME::DBStream::cleanUp(clock_t time){ + time; +} + diff --git a/src/Algorithm/DataStructure/CMakeLists.txt b/src/Algorithm/DataStructure/CMakeLists.txt index 120bafaa..3336d264 100644 --- a/src/Algorithm/DataStructure/CMakeLists.txt +++ b/src/Algorithm/DataStructure/CMakeLists.txt @@ -4,6 +4,7 @@ add_source_sesame( CoresetTree.cpp MicroCluster.cpp Snapshot.cpp + WeightedAdjacencyList.cpp DataStructureFactory.cpp CFTree.cpp FeatureVector.cpp diff --git a/src/Algorithm/DataStructure/DataStructureFactory.cpp b/src/Algorithm/DataStructure/DataStructureFactory.cpp index b0397a81..bf973964 100644 --- a/src/Algorithm/DataStructure/DataStructureFactory.cpp +++ b/src/Algorithm/DataStructure/DataStructureFactory.cpp @@ -33,7 +33,9 @@ void SESAME::DataStructureFactory::clearCoresetTree(SESAME::CoresetTreePtr tree) SESAME::MicroClusterPtr SESAME::DataStructureFactory::createMicroCluster(int id, int dimension){ return std::make_shared( id, dimension); } - +SESAME::MicroClusterPtr SESAME::DataStructureFactory::createMicroCluster(int dimension, int id,PointPtr dataPoint,double radius){ + return std::make_shared( dimension, id,dataPoint,radius); +} void SESAME::DataStructureFactory::clearMicroCluster(SESAME::MicroClusterPtr microCluster){ microCluster.reset(); } @@ -51,3 +53,18 @@ SESAME::CFTreePtr SESAME::DataStructureFactory::createCFTree() { SESAME::NodePtr SESAME::DataStructureFactory::createNode() { return std::make_shared(); } +SESAME::MicroClusterPairPtr SESAME::DataStructureFactory::createMicroClusterPair(MicroClusterPtr microCluster1, + MicroClusterPtr microCluster2){ + return std::make_shared(microCluster1,microCluster2); +} + +void SESAME::DataStructureFactory::clearMicroClusterPair(MicroClusterPairPtr microClusterPair){ + microClusterPair.reset(); +} + +SESAME::AdjustedWeightPtr SESAME::DataStructureFactory::createAdjustedWeight(double weight, clock_t pointTime){ + return std::make_shared(weight,pointTime); +} +void SESAME::DataStructureFactory::clearAdjustedWeight(SESAME::AdjustedWeightPtr adjustedWeight){ + adjustedWeight.reset(); +} \ No newline at end of file diff --git a/src/Algorithm/DataStructure/MicroCluster.cpp b/src/Algorithm/DataStructure/MicroCluster.cpp index 147fea31..693eff23 100644 --- a/src/Algorithm/DataStructure/MicroCluster.cpp +++ b/src/Algorithm/DataStructure/MicroCluster.cpp @@ -17,6 +17,23 @@ SESAME::MicroCluster::MicroCluster(int dimension, int id) this->visited=false; this->createTime=clock(); this->lastUpdateTime=this->createTime; + radius=0; +} +SESAME::MicroCluster::MicroCluster(int dimension, int id,PointPtr dataPoint,double radius){ + this->dimension=dimension; + weight=1; + this->id.push_back(id); + LST=0; + SST=0; + this->visited=false; + this->createTime=clock(); + this->lastUpdateTime=this->createTime; + this->radius=radius; + for (int i = 0; i < dimension; i++) { + double data = dataPoint->getFeatureItem(i); + LS.push_back(data); + centroid.push_back(data); + } } //Release memory of the current micro cluster @@ -40,6 +57,7 @@ void SESAME::MicroCluster::init(PointPtr datapoint,int timestamp) SST+=timestamp*timestamp; } + //insert a new data point from input data stream void SESAME::MicroCluster::insert(PointPtr datapoint,int timestamp) { @@ -53,8 +71,29 @@ void SESAME::MicroCluster::insert(PointPtr datapoint,int timestamp) LST+=timestamp; SST+=timestamp*timestamp; centroid=std::move(getCentroid()); - } +void SESAME::MicroCluster::insert(PointPtr datapoint) +{ + weight++; + double val = exp(-(pow(3 * this->distance / radius, 2) / 2)); + for(int i=0; igetFeatureItem(i); + LS[i] = centroid.at(i) + val * (data - centroid.at(i)); + } +} +double SESAME::MicroCluster::getDistance(PointPtr datapoint){ + this->distance=calCentroidDistance(datapoint); + return this->distance; +} +double SESAME::MicroCluster::getDistance(MicroClusterPtr other){ + double temp = 0, dist = 0; + for (int i = 0; i < this->dimension; i++) { + temp = this->centroid[i] - other->centroid[i]; + dist += temp * temp; + } + return sqrt(dist); +} + bool SESAME::MicroCluster::insert(PointPtr datapoint,double decayFactor,double epsilon){ bool result; dataPoint LSPre; LSPre.assign(this->LS.begin(),this->LS.end()); @@ -97,7 +136,7 @@ void SESAME::MicroCluster::merge(MicroClusterPtr other){ } //Calculate the process of micro cluster N(Tc-h') -void SESAME::MicroCluster::substractClusterVector(MicroClusterPtr other) +void SESAME::MicroCluster::subtractClusterVector(MicroClusterPtr other) { this->weight-=other->weight; for(int i=0; icentroid=this->LS; +} + + double SESAME::MicroCluster::inverseError(double x){ double z = sqrt(M_PI) * x; double res = (z) / 2; diff --git a/src/Algorithm/DataStructure/Snapshot.cpp b/src/Algorithm/DataStructure/Snapshot.cpp index 694b5e3e..d72658db 100644 --- a/src/Algorithm/DataStructure/Snapshot.cpp +++ b/src/Algorithm/DataStructure/Snapshot.cpp @@ -59,13 +59,13 @@ SESAME::SnapshotPtr SESAME::Snapshot::substractSnapshot(SnapshotPtr snapshotCurr for(unsigned int j=0; jmicroClusters[j]->id.size()>1) { if(snapshotCurrent->microClusters[i]->judgeMerge(snapshotLandmark->microClusters[j])) - snapshotCurrent->microClusters[i]->substractClusterVector(snapshotLandmark->microClusters[j]); + snapshotCurrent->microClusters[i]->subtractClusterVector(snapshotLandmark->microClusters[j]); } else { int clusterIdLandmark; clusterIdLandmark = snapshotLandmark->microClusters[j]->id[0]; if(std::find(snapshotCurrent->microClusters[i]->id.begin(), snapshotCurrent->microClusters[i]->id.end(), clusterIdLandmark)!=snapshotCurrent->microClusters[i]->id.end()) - snapshotCurrent->microClusters[i]->substractClusterVector(snapshotLandmark->microClusters[j]); + snapshotCurrent->microClusters[i]->subtractClusterVector(snapshotLandmark->microClusters[j]); } } } @@ -75,7 +75,7 @@ SESAME::SnapshotPtr SESAME::Snapshot::substractSnapshot(SnapshotPtr snapshotCurr if(snapshotLandmark->microClusters[j]->id.size()==1) { int clusterIdLandmark=snapshotLandmark->microClusters[j]->id[0]; if(snapshotCurrent->microClusters[i]->id[0]==clusterIdLandmark) - snapshotCurrent->microClusters[i]->substractClusterVector(snapshotLandmark->microClusters[j]); + snapshotCurrent->microClusters[i]->subtractClusterVector(snapshotLandmark->microClusters[j]); } } } diff --git a/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp b/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp new file mode 100644 index 00000000..b6529409 --- /dev/null +++ b/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp @@ -0,0 +1,35 @@ +// +// Created by 1124a on 2021/8/30. +// +#include +SESAME::MicroClusterPair::MicroClusterPair( MicroClusterPtr microCluster1,MicroClusterPtr microCluster2){ + this->microCluster1=microCluster1->copy(); + this->microCluster2=microCluster2->copy(); +} + +bool SESAME::MicroClusterPair::equal(MicroClusterPairPtr other){ + bool equal=false; + if(other->microCluster1==this->microCluster1&&other->microCluster2==this->microCluster2) + equal=true; + if(other->microCluster1==this->microCluster2&&other->microCluster2==this->microCluster1) + equal=true; + return equal; +} +SESAME::AdjustedWeight::AdjustedWeight(double weight, clock_t pointTime){ + this->weight=weight; + this->updateTime=pointTime; +} +void SESAME::AdjustedWeight::add(clock_t startTime,double decayValue) { + if ( startTime == this->updateTime) { + weight++; + } + else { + weight *= decayValue + 1; + this->updateTime = clock(); + } +} + + +double SESAME::AdjustedWeight::getCurrentWeight(double decayFactor){ + return weight * decayFactor; +} diff --git a/src/Algorithm/DenStream.cpp b/src/Algorithm/DenStream.cpp index 37ec2f89..bd764a20 100644 --- a/src/Algorithm/DenStream.cpp +++ b/src/Algorithm/DenStream.cpp @@ -118,9 +118,9 @@ void SESAME::DenStream::runOnlineClustering(PointPtr input) { } } } - // SESAME_INFO("Insert Succeed "<pMicroClusters.empty()) { index=mergeToMicroCluster(dataPoint,this->pMicroClusters); - //SESAME_INFO("Merge into PMC! "<oMicroClusters.empty()) { index=mergeToMicroCluster(dataPoint,this->oMicroClusters); - // SESAME_INFO("Merge into OMC! "<=0) { double decayFactor= this->dampedWindow->decayFunction(this->oMicroClusters.at(index)->lastUpdateTime,pointArrivingTime); - //SESAME_INFO("Merge INTO OMC! "<oMicroClusters.at(index)->weight)*decayFactor>minWeight) { // SESAME_INFO("erase OMC and turn into PMC! "); @@ -156,7 +156,7 @@ void SESAME::DenStream::merge(PointPtr dataPoint){ MicroClusterPtr newOMicroCluster=DataStructureFactory::createMicroCluster(denStreamParams.dimension, oMicroClusterIndex); newOMicroCluster->init(dataPoint, 0); oMicroClusters.push_back(newOMicroCluster->copy()); - // SESAME_INFO("Create new OMC! "< microClusters){ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 575bebd9..3deaa2e6 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,9 +1,9 @@ # adding the Google_Tests_run target add_executable(Google_Tests_run + SystemTest/CluStreamTest.cpp + SystemTest/StreamKMTest.cpp + SystemTest/BirchTest.cpp SystemTest/DenStreamTest.cpp - #SystemTest/CluStreamTest.cpp - # SystemTest/StreamKMTest.cpp - #SystemTest/BirchTest.cpp ) # linking Google_Tests_run with sesame_lib which will be tested From 9d6eb5a5b28951b8a44d6e046e5fcd5d8df41a4f Mon Sep 17 00:00:00 2001 From: s1926539 Date: Tue, 31 Aug 2021 15:57:09 +0800 Subject: [PATCH 02/21] Finish Coding online part of DBStream --- include/Algorithm/DBStream.hpp | 6 +- .../Algorithm/DataStructure/MicroCluster.hpp | 1 + src/Algorithm/CluStream.cpp | 34 +++--- src/Algorithm/DBStream.cpp | 100 +++++++++++------- src/Algorithm/DataStructure/MicroCluster.cpp | 4 +- .../DataStructure/WeightedAdjacencyList.cpp | 2 + 6 files changed, 86 insertions(+), 61 deletions(-) diff --git a/include/Algorithm/DBStream.hpp b/include/Algorithm/DBStream.hpp index 85f65211..815334ae 100644 --- a/include/Algorithm/DBStream.hpp +++ b/include/Algorithm/DBStream.hpp @@ -32,6 +32,7 @@ class DBStream : public Algorithm DampedWindowPtr dampedWindow; unordered_set microClusters; SESAME::WeightedAdjacencyList weightedAdjacencyList; + std::vector microClusterNN; double weakEntry;//W_weak, weak entries double aWeakEntry; clock_t startTime; @@ -45,11 +46,10 @@ class DBStream : public Algorithm void runOfflineClustering(DataSinkPtr sinkPtr) override; private: bool isInitial = false; - // vector initialBuffer; void update(PointPtr dataPoint); - void cleanUp(clock_t time); - bool checkMove( std::vector microClusters); + bool checkMove( std::vector microClusters) const; std::vector findFixedRadiusNN(PointPtr dataPoint, double decayFactor); + void cleanUp(clock_t nowTime); }; } diff --git a/include/Algorithm/DataStructure/MicroCluster.hpp b/include/Algorithm/DataStructure/MicroCluster.hpp index 2af59d63..15037343 100644 --- a/include/Algorithm/DataStructure/MicroCluster.hpp +++ b/include/Algorithm/DataStructure/MicroCluster.hpp @@ -72,6 +72,7 @@ class MicroCluster { double getDistance(PointPtr datapoint);//DBStream double getDistance(MicroClusterPtr other);//DBStream void move();//DBStream + void decayWeight(double decayFactor); SESAME::MicroClusterPtr copy(); private: diff --git a/src/Algorithm/CluStream.cpp b/src/Algorithm/CluStream.cpp index 2eeb0d37..72626fba 100644 --- a/src/Algorithm/CluStream.cpp +++ b/src/Algorithm/CluStream.cpp @@ -9,8 +9,8 @@ #include /** * @Description: "offline" init micro clusters using KMeans - * @param size: The size of initial data obects, - * initialData:input intial data + * @param size: The size of initial data objects, + * initialData:input initial data *@Return: void */ SESAME::CluStream::CluStream(param_t &cmd_params) { @@ -98,7 +98,7 @@ double SESAME::CluStream::calRadius(MicroClusterPtr closestCluster) { radius = doubleMax; dataPoint centroid = closestCluster->getCentroid(); for (int i = 0; i < this->CluStreamParam.clusterNumber; i++) { - // SESAME_INFO("i is for wegf"<id == closestCluster->id) { continue; } @@ -118,7 +118,7 @@ void SESAME::CluStream::insertIntoCluster(PointPtr data, MicroClusterPtr operate operateCluster->insert(data, timestamp); } -//Delete oldest cluster and create new one case +//Delete the oldest cluster and create new one case void SESAME::CluStream::deleteCreateCluster(PointPtr data) { // 3.1 Try to forget old micro clusters @@ -172,7 +172,7 @@ void SESAME::CluStream::microClusterToPoint(std::vector µC for (int i = 0; i < this->CluStreamParam.clusterNumber; i++) { PointPtr point = DataStructureFactory::createPoint(i, microClusters[i]->weight, microClusters[i]->centroid.size(), 0); - for (int j = 0; j < microClusters[i]->centroid.size(); j++) + for (SESAME::dataPoint::size_type j = 0; j < microClusters[i]->centroid.size(); j++) point->setFeatureItem(microClusters[i]->centroid[j], j); //points; points.push_back(point); @@ -255,17 +255,17 @@ void SESAME::CluStream::runOfflineClustering(SESAME::DataSinkPtr sinkPtr) { landmarkTime = 0; SESAME_INFO("Start offline..."); SESAME::SnapshotPtr landmarkSnapshot; - SESAME::SnapshotPtr substractMiroCluster; + SESAME::SnapshotPtr subtractMiroCluster; //If offlineTimeWindow ==0, Only Observe the end results of micro clusters - substractMiroCluster = + subtractMiroCluster = DataStructureFactory::createSnapshot(microClusters, (int) ((now - startTime) / CLOCKS_PER_SEC)); SESAME_INFO("Now Miro Cluster is..."); for (int i = 0; i < CluStreamParam.clusterNumber; i++) { std::stringstream result, re2; - std::copy(substractMiroCluster->microClusters[i]->id.begin(), - substractMiroCluster->microClusters[i]->id.end(), + std::copy(subtractMiroCluster->microClusters[i]->id.begin(), + subtractMiroCluster->microClusters[i]->id.end(), std::ostream_iterator(re2, " ")); - SESAME_INFO("The ID is " << re2.str() << "weight is " << substractMiroCluster->microClusters[i]->weight); + SESAME_INFO("The ID is " << re2.str() << "weight is " << subtractMiroCluster->microClusters[i]->weight); } //The offline is to observe a process of data stream clustering @@ -282,21 +282,21 @@ void SESAME::CluStream::runOfflineClustering(SESAME::DataSinkPtr sinkPtr) { SESAME_INFO("The ID is " << re2.str() << "weight is " << landmarkSnapshot->microClusters[i]->weight); } if (landmarkSnapshot->elapsedTime == -1) - landmarkSnapshot = substractMiroCluster; + landmarkSnapshot = subtractMiroCluster; - substractMiroCluster = SESAME::Snapshot::substractSnapshot(substractMiroCluster, landmarkSnapshot, + subtractMiroCluster = SESAME::Snapshot::substractSnapshot(subtractMiroCluster, landmarkSnapshot, this->CluStreamParam.clusterNumber); } - SESAME_INFO("substract Miro Cluster is..."); + SESAME_INFO("subtract Miro Cluster is..."); for (int i = 0; i < CluStreamParam.clusterNumber; i++) { std::stringstream result, re2; - std::copy(substractMiroCluster->microClusters[i]->id.begin(), - substractMiroCluster->microClusters[i]->id.end(), + std::copy(subtractMiroCluster->microClusters[i]->id.begin(), + subtractMiroCluster->microClusters[i]->id.end(), std::ostream_iterator(re2, " ")); - SESAME_INFO("The ID is " << re2.str() << "weight is " << substractMiroCluster->microClusters[i]->weight); + SESAME_INFO("The ID is " << re2.str() << "weight is " << subtractMiroCluster->microClusters[i]->weight); } vector TransformedSnapshot; - microClusterToPoint(substractMiroCluster->microClusters, TransformedSnapshot); + microClusterToPoint(subtractMiroCluster->microClusters, TransformedSnapshot); SESAME_INFO("offline Cluster Number " << this->CluStreamParam.offlineClusterNumber << "Total number of p: " << TransformedSnapshot.size()); diff --git a/src/Algorithm/DBStream.cpp b/src/Algorithm/DBStream.cpp index 504ef7a9..fa2adead 100644 --- a/src/Algorithm/DBStream.cpp +++ b/src/Algorithm/DBStream.cpp @@ -5,6 +5,8 @@ #include #include +#pragma clang diagnostic push + SESAME::DBStream::DBStream(param_t &cmd_params){ this->dbStreamParams.radius=cmd_params.radius; this->dbStreamParams.lambda=cmd_params.lambda; @@ -18,40 +20,37 @@ SESAME::DBStream:: ~DBStream() void SESAME::DBStream::Initilize() { this->dampedWindow = WindowFactory::createDampedWindow(dbStreamParams.base, dbStreamParams.lambda); this->startTime = clock(); + this->pointArrivingTime= clock(); this->weakEntry= pow(dbStreamParams.base,(-1)*dbStreamParams.lambda*dbStreamParams.cleanUpInterval); this->aWeakEntry=weakEntry*dbStreamParams.alpha; - microClusters=unordered_set(); this->microClusterIndex=-1; } void SESAME::DBStream::runOnlineClustering(PointPtr input) { - input; + if (!this->isInitial) { Initilize(); this->isInitial = true; } else { - + if(input->getIndex()) + this->pointArrivingTime=clock(); + update(input); } } void SESAME::DBStream::runOfflineClustering(DataSinkPtr sinkPtr) { sinkPtr; } void SESAME::DBStream::update(PointPtr dataPoint){ - if(dataPoint->getIndex()==0) - { - this->pointArrivingTime=clock(); - } double decayFactor=dampedWindow->decayFunction(this->pointArrivingTime, clock()); this->pointArrivingTime=clock(); - std::vector microClusterNN=findFixedRadiusNN(dataPoint,decayFactor); - int sizeNN=microClusterNN.size(); - if (microClusterNN.size() < 1) { + this->microClusterNN=findFixedRadiusNN(dataPoint,decayFactor); + std::vector::size_type sizeNN=microClusterNN.size(); + if (microClusterNN.empty()) { microClusterIndex++; - MicroClusterPtr newMicroCluster=SESAME::DataStructureFactory::createMicroCluster(dbStreamParams.dimension, - microClusterIndex,dataPoint, - dbStreamParams.radius); + MicroClusterPtr newMicroCluster=SESAME::DataStructureFactory::createMicroCluster(dbStreamParams.dimension,microClusterIndex, + dataPoint,dbStreamParams.radius); microClusters.insert(newMicroCluster); microClusterNN.push_back(newMicroCluster); } @@ -64,8 +63,8 @@ void SESAME::DBStream::update(PointPtr dataPoint){ MicroClusterPairPtr microClusterPair =SESAME::DataStructureFactory::createMicroClusterPair(microCluster, microClusterNN.at(j)); if (weightedAdjacencyList.find(microClusterPair) != weightedAdjacencyList.end()) { - clock_t startTime= weightedAdjacencyList[microClusterPair]->updateTime; - double decayValue = dampedWindow->decayFunction(startTime,this->pointArrivingTime); + clock_t startT= weightedAdjacencyList[microClusterPair]->updateTime; + double decayValue = dampedWindow->decayFunction(startT,this->pointArrivingTime); weightedAdjacencyList[microClusterPair]->add(this->pointArrivingTime,decayValue); } else { AdjustedWeightPtr adjustedWeight = SESAME::DataStructureFactory::createAdjustedWeight(1,this->pointArrivingTime); @@ -73,41 +72,62 @@ void SESAME::DBStream::update(PointPtr dataPoint){ weightedAdjacencyList.insert(densityGraph); } } - } - if (checkMove(microClusterNN)) { - for (MicroClusterPtr microCluster : microClusterNN) { - microCluster->move(); - } - } - if (pointArrivingTime% dbStreamParams.cleanUpInterval == 0 && dataPoint->getIndex()!=0) { - cleanUp(pointArrivingTime); - } - // return microClusterNN; + if (checkMove(microClusterNN)) + for (const MicroClusterPtr& microCluster : microClusterNN) microCluster->move(); } + if (((pointArrivingTime-this->startTime)/CLOCKS_PER_SEC)% dbStreamParams.cleanUpInterval == 0 && dataPoint->getIndex()!=0) + cleanUp(this->pointArrivingTime); } -bool SESAME::DBStream::checkMove( std::vector microClusters){ - int size = microClusters.size(); - for (int i = 0; i < size; i++) { - for (int j = i + 1; j < size; j++) { - if (microClusters.at(i)->getDistance(microClusters.at(j)) < dbStreamParams.radius) { - return false; + +bool SESAME::DBStream::checkMove( std::vector microClustersList) const +{ + bool move=true; + if(!microClustersList.empty()) + { std::vector::size_type size = microClustersList.size(); + for (int i = 0; i < size; i++){ + for (int j = i + 1; j < size; j++){ + double distance=microClustersList.at(i)->getDistance(microClustersList.at(j)); + if (distance < dbStreamParams.radius) + move= false; } } } - return true; + return move; } -//TODO Stop here std::vector SESAME::DBStream::findFixedRadiusNN(PointPtr dataPoint, double decayFactor) { - std::vector microClusterList; - dataPoint; - decayFactor; - return microClusterList; - + std::vector result; + unordered_set::iterator iter; + for (iter= this->microClusters.begin();iter!= this->microClusters.end();iter++) { + (*iter)->decayWeight(decayFactor); + double distance = (*iter)->getDistance(dataPoint); + if (distance < dbStreamParams.radius) { + result.push_back(*iter); + } + } + return result; } -void SESAME::DBStream::cleanUp(clock_t time){ - time; +void SESAME::DBStream::cleanUp(clock_t nowTime){ + unordered_set removeMicroCluster; + unordered_set::iterator iter; + for (iter = microClusters.begin(); iter != microClusters.end(); iter++) { + if ((*iter)->weight <= this->weakEntry){ + removeMicroCluster.insert((*iter)->copy()); + microClusters.erase(iter); + } + } + WeightedAdjacencyList::iterator interW; + for (interW = weightedAdjacencyList.begin(); interW != weightedAdjacencyList.end(); interW++){ + if (removeMicroCluster.find(interW->first->microCluster1) !=removeMicroCluster.end() + || removeMicroCluster.find(interW->first->microCluster2)!=removeMicroCluster.end()) + weightedAdjacencyList.erase(interW); + else{ + double decayFactor=dampedWindow->decayFunction(interW->second->updateTime,nowTime); + if (interW->second->getCurrentWeight(decayFactor) < aWeakEntry) + weightedAdjacencyList.erase(interW); + } + } } diff --git a/src/Algorithm/DataStructure/MicroCluster.cpp b/src/Algorithm/DataStructure/MicroCluster.cpp index 693eff23..60e0f6f4 100644 --- a/src/Algorithm/DataStructure/MicroCluster.cpp +++ b/src/Algorithm/DataStructure/MicroCluster.cpp @@ -299,7 +299,9 @@ void SESAME::MicroCluster::move(){ this->centroid=this->LS; } - +void SESAME::MicroCluster::decayWeight(double decayFactor){ + this->weight *=decayFactor; +} double SESAME::MicroCluster::inverseError(double x){ double z = sqrt(M_PI) * x; double res = (z) / 2; diff --git a/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp b/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp index b6529409..a5cdadbd 100644 --- a/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp +++ b/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp @@ -33,3 +33,5 @@ void SESAME::AdjustedWeight::add(clock_t startTime,double decayValue) { double SESAME::AdjustedWeight::getCurrentWeight(double decayFactor){ return weight * decayFactor; } + + From 5876359df4dea13dd6cb30adf604dff90d0b0590 Mon Sep 17 00:00:00 2001 From: s1926539 Date: Wed, 1 Sep 2021 13:00:49 +0800 Subject: [PATCH 03/21] Finish DBStream coding, still has problems in OfflineClustering, how to transfer microCluster into Point --- include/Algorithm/DBStream.hpp | 11 ++- src/Algorithm/DBStream.cpp | 73 ++++++++++++++++++++ src/Algorithm/DataStructure/MicroCluster.cpp | 4 +- 3 files changed, 86 insertions(+), 2 deletions(-) diff --git a/include/Algorithm/DBStream.hpp b/include/Algorithm/DBStream.hpp index 815334ae..683def98 100644 --- a/include/Algorithm/DBStream.hpp +++ b/include/Algorithm/DBStream.hpp @@ -16,6 +16,7 @@ #include #include namespace SESAME { +typedef std::vector> Clusters; class DBStreamParams : public AlgorithmParameters { public: double radius; @@ -32,12 +33,14 @@ class DBStream : public Algorithm DampedWindowPtr dampedWindow; unordered_set microClusters; SESAME::WeightedAdjacencyList weightedAdjacencyList; - std::vector microClusterNN; + std::vector microClusterNN;//micro clusters found in function findFixedRadiusNN double weakEntry;//W_weak, weak entries double aWeakEntry; clock_t startTime; clock_t pointArrivingTime; int microClusterIndex; + //Final output of clusters + Clusters finalClusters; //TODO Need to implement weighted a weighted adjacency list S DBStream(param_t &cmd_params); ~DBStream(); @@ -50,6 +53,12 @@ class DBStream : public Algorithm bool checkMove( std::vector microClusters) const; std::vector findFixedRadiusNN(PointPtr dataPoint, double decayFactor); void cleanUp(clock_t nowTime); + void reCluster(double threshold); + static void insertIntoGraph(unordered_map> connectivityGraph, + MicroClusterPtr microCluster,MicroClusterPtr Other); + static void insertIntoGraph(unordered_map> connectivityGraph, + MicroClusterPtr microCluster); + void findConnectedComponents(unordered_map> connectivityGraph); }; } diff --git a/src/Algorithm/DBStream.cpp b/src/Algorithm/DBStream.cpp index fa2adead..7713aafa 100644 --- a/src/Algorithm/DBStream.cpp +++ b/src/Algorithm/DBStream.cpp @@ -40,8 +40,13 @@ void SESAME::DBStream::Initilize() { } } void SESAME::DBStream::runOfflineClustering(DataSinkPtr sinkPtr) { + sinkPtr; + reCluster(dbStreamParams.alpha); } + + + void SESAME::DBStream::update(PointPtr dataPoint){ double decayFactor=dampedWindow->decayFunction(this->pointArrivingTime, clock()); this->pointArrivingTime=clock(); @@ -131,3 +136,71 @@ void SESAME::DBStream::cleanUp(clock_t nowTime){ } } +void SESAME::DBStream::reCluster(double threshold){ + unordered_map> connectivityGraph;//C in DBStream paper + WeightedAdjacencyList::iterator interW; + for (interW = weightedAdjacencyList.begin(); interW != weightedAdjacencyList.end(); interW++){ + MicroClusterPtr microCluster1 =interW->first->microCluster1->copy(); + MicroClusterPtr microCluster2 =interW->first->microCluster2->copy(); + if (microCluster1->weight >= dbStreamParams.weightMin &µCluster2->weight >= dbStreamParams.weightMin){ + double val = 2*interW->second->weight / (microCluster1->weight+microCluster2->weight); + if (val > threshold) { + insertIntoGraph(connectivityGraph,microCluster1,microCluster2); + insertIntoGraph(connectivityGraph,microCluster2,microCluster1); + } + else + { + insertIntoGraph(connectivityGraph,microCluster1); + insertIntoGraph(connectivityGraph,microCluster2); + } + } + } + findConnectedComponents(connectivityGraph); + +} +void SESAME::DBStream::insertIntoGraph(unordered_map> connectivityGraph, + MicroClusterPtr microCluster,MicroClusterPtr Other){ + if (connectivityGraph.find(microCluster)!=connectivityGraph.end()) + connectivityGraph.find(microCluster)->second.insert(Other); + else{ + microCluster->visited = false; + unordered_set newMicroClusterSet; + newMicroClusterSet.insert(Other); + connectivityGraph.insert(std::make_pair(microCluster,newMicroClusterSet)); + } +} +void SESAME::DBStream::insertIntoGraph(unordered_map> connectivityGraph, + MicroClusterPtr microCluster){ + if (connectivityGraph.find(microCluster)==connectivityGraph.end()) + { + microCluster->visited = false; + unordered_set newMicroClusterSet; + connectivityGraph.insert(std::make_pair(microCluster,newMicroClusterSet)); + } +} + +void SESAME::DBStream::findConnectedComponents(unordered_map> connectivityGraph){ + unordered_map>::iterator inter; + for (inter = connectivityGraph.begin(); inter != connectivityGraph.end(); inter++){ + if (!inter->first->visited) { + std::vector newCluster, clusterGroup; + newCluster.push_back(inter->first); + while (!newCluster.empty()) { + //after found the front one, insert it into clusterGroup and delete from the original vector + MicroClusterPtr microCluster = newCluster.front(); + newCluster.erase(newCluster.begin()); + clusterGroup.push_back(microCluster); + microCluster->visited = true; + + for(const auto & interS : inter->second) + { + if (!interS->visited) + newCluster.push_back(interS); + } + } + this->finalClusters.push_back(clusterGroup); + } + } + +} diff --git a/src/Algorithm/DataStructure/MicroCluster.cpp b/src/Algorithm/DataStructure/MicroCluster.cpp index 60e0f6f4..8df58fa6 100644 --- a/src/Algorithm/DataStructure/MicroCluster.cpp +++ b/src/Algorithm/DataStructure/MicroCluster.cpp @@ -18,6 +18,7 @@ SESAME::MicroCluster::MicroCluster(int dimension, int id) this->createTime=clock(); this->lastUpdateTime=this->createTime; radius=0; + visited=false; } SESAME::MicroCluster::MicroCluster(int dimension, int id,PointPtr dataPoint,double radius){ this->dimension=dimension; @@ -25,7 +26,7 @@ SESAME::MicroCluster::MicroCluster(int dimension, int id,PointPtr dataPoint,doub this->id.push_back(id); LST=0; SST=0; - this->visited=false; + this->visited=true; this->createTime=clock(); this->lastUpdateTime=this->createTime; this->radius=radius; @@ -34,6 +35,7 @@ SESAME::MicroCluster::MicroCluster(int dimension, int id,PointPtr dataPoint,doub LS.push_back(data); centroid.push_back(data); } + } //Release memory of the current micro cluster From 5823f2e2b70dc0eb1e6ea2adb1db6f58ba0511dc Mon Sep 17 00:00:00 2001 From: Shuhao Zhang Date: Wed, 1 Sep 2021 17:16:44 +0800 Subject: [PATCH 04/21] #56 code reformat --- CMakeLists.txt | 1 - include/Evaluation/Evaluation.hpp | 3 +++ include/Sources/DataSource.hpp | 1 + include/Utils/BenchmarkUtils.hpp | 3 ++- src/Evaluation/Evaluation.cpp | 3 --- src/Sources/DataSource.cpp | 3 +++ src/Utils/BenchmarkUtils.cpp | 26 ++++++++++++++++++-------- 7 files changed, 27 insertions(+), 13 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ad2c7026..6627ea17 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,6 @@ cmake_minimum_required(VERSION 3.14) project(SESAME CXX) # Judge the os system - if (UNIX AND NOT APPLE) add_definitions(-DUSELOG4CXX) MESSAGE(STATUS "Enalbe Log4CXX") diff --git a/include/Evaluation/Evaluation.hpp b/include/Evaluation/Evaluation.hpp index 5001e046..ebcaad48 100644 --- a/include/Evaluation/Evaluation.hpp +++ b/include/Evaluation/Evaluation.hpp @@ -9,6 +9,9 @@ #include #include namespace SESAME { + +enum evaluateType { euclideanCost }; + class Evaluation { public: static void euclideanCost(int numberOfPoints, diff --git a/include/Sources/DataSource.hpp b/include/Sources/DataSource.hpp index 70acd84d..11f378ed 100644 --- a/include/Sources/DataSource.hpp +++ b/include/Sources/DataSource.hpp @@ -31,6 +31,7 @@ class DataSource { void load(int point_number, int dimension, vector input); bool empty(); PointPtr get(); + std::vector getInputs(); DataSource(); ~DataSource(); void runningRoutine(); diff --git a/include/Utils/BenchmarkUtils.hpp b/include/Utils/BenchmarkUtils.hpp index 134b577c..183d2a7c 100644 --- a/include/Utils/BenchmarkUtils.hpp +++ b/include/Utils/BenchmarkUtils.hpp @@ -19,6 +19,7 @@ #include #include #include +#include struct param_t { int pointNumber; @@ -51,7 +52,7 @@ struct param_t { std::string inputPath; std::string outputPath; std::string algoName; - + SESAME::evaluateType evaluateType; }; class BenchmarkUtils { diff --git a/src/Evaluation/Evaluation.cpp b/src/Evaluation/Evaluation.cpp index 72c2a2ef..ada4dcd1 100644 --- a/src/Evaluation/Evaluation.cpp +++ b/src/Evaluation/Evaluation.cpp @@ -1,8 +1,5 @@ // Copyright (C) 2021 by the IntelliStream team (https://github.com/intellistream) -// -// Created by Shuhao Zhang on 26/07/2021. -// #include #include #include diff --git a/src/Sources/DataSource.cpp b/src/Sources/DataSource.cpp index 947995f3..5a6b1d55 100644 --- a/src/Sources/DataSource.cpp +++ b/src/Sources/DataSource.cpp @@ -89,4 +89,7 @@ void SESAME::DataSource::setBarrier(SESAME::BarrierPtr barrierPtr) { SESAME::DataSource::~DataSource() { stop(); } +vector SESAME::DataSource::getInputs() { + return input; +} diff --git a/src/Utils/BenchmarkUtils.cpp b/src/Utils/BenchmarkUtils.cpp index b6012798..ff5c7022 100644 --- a/src/Utils/BenchmarkUtils.cpp +++ b/src/Utils/BenchmarkUtils.cpp @@ -140,16 +140,17 @@ void BenchmarkUtils::defaultParam(param_t &cmd_params) { cmd_params.maxLeafNodes = 3; cmd_params.maxInternalNodes = 3; cmd_params.thresholdDistance = 6550; - cmd_params.minPoints=10; - cmd_params.epsilon=50; - cmd_params.base=2; - cmd_params.lambda=1.8; - cmd_params.mu=7; - cmd_params.beta=5; + cmd_params.minPoints = 10; + cmd_params.epsilon = 50; + cmd_params.base = 2; + cmd_params.lambda = 1.8; + cmd_params.mu = 7; + cmd_params.beta = 5; cmd_params.inputPath = std::filesystem::current_path().generic_string() + "/datasets/CoverType.txt"; SESAME_INFO("Default Input Data Directory: " + cmd_params.inputPath); cmd_params.outputPath = "results.txt"; cmd_params.algoName = "Birch";//StreamKMeans CluStream Birch + cmd_params.evaluateType = SESAME::euclideanCost; } /* command line handling functions */ @@ -200,8 +201,17 @@ void BenchmarkUtils::runBenchmark(param_t &cmd_params, //Store results. algoPtr->store(cmd_params.outputPath, cmd_params.dimension, sinkPtr->getResults()); - SESAME_INFO("Finished store results: "<getResults().size()); - + SESAME_INFO("Finished store results: " << sinkPtr->getResults().size()); + + switch (cmd_params.evaluateType) { + case SESAME::euclideanCost: + SESAME::Evaluation::euclideanCost(cmd_params.pointNumber, + sinkPtr->getResults().size(), + cmd_params.dimension, + sourcePtr->getInputs(), + sinkPtr->getResults()); + break; + } engine.stop(); } From a3c1392b52130389ca6c65fd272a84e7b0208509 Mon Sep 17 00:00:00 2001 From: s1926539 Date: Tue, 31 Aug 2021 00:56:10 +0800 Subject: [PATCH 05/21] Finish Coding half of DBStream --- include/Algorithm/DBStream.hpp | 15 +- .../Algorithm/DataStructure/MicroCluster.hpp | 1 - src/Algorithm/DBStream.cpp | 169 ++++-------------- src/Algorithm/DataStructure/MicroCluster.cpp | 8 +- .../DataStructure/WeightedAdjacencyList.cpp | 2 - 5 files changed, 43 insertions(+), 152 deletions(-) diff --git a/include/Algorithm/DBStream.hpp b/include/Algorithm/DBStream.hpp index 683def98..85f65211 100644 --- a/include/Algorithm/DBStream.hpp +++ b/include/Algorithm/DBStream.hpp @@ -16,7 +16,6 @@ #include #include namespace SESAME { -typedef std::vector> Clusters; class DBStreamParams : public AlgorithmParameters { public: double radius; @@ -33,14 +32,11 @@ class DBStream : public Algorithm DampedWindowPtr dampedWindow; unordered_set microClusters; SESAME::WeightedAdjacencyList weightedAdjacencyList; - std::vector microClusterNN;//micro clusters found in function findFixedRadiusNN double weakEntry;//W_weak, weak entries double aWeakEntry; clock_t startTime; clock_t pointArrivingTime; int microClusterIndex; - //Final output of clusters - Clusters finalClusters; //TODO Need to implement weighted a weighted adjacency list S DBStream(param_t &cmd_params); ~DBStream(); @@ -49,16 +45,11 @@ class DBStream : public Algorithm void runOfflineClustering(DataSinkPtr sinkPtr) override; private: bool isInitial = false; + // vector initialBuffer; void update(PointPtr dataPoint); - bool checkMove( std::vector microClusters) const; + void cleanUp(clock_t time); + bool checkMove( std::vector microClusters); std::vector findFixedRadiusNN(PointPtr dataPoint, double decayFactor); - void cleanUp(clock_t nowTime); - void reCluster(double threshold); - static void insertIntoGraph(unordered_map> connectivityGraph, - MicroClusterPtr microCluster,MicroClusterPtr Other); - static void insertIntoGraph(unordered_map> connectivityGraph, - MicroClusterPtr microCluster); - void findConnectedComponents(unordered_map> connectivityGraph); }; } diff --git a/include/Algorithm/DataStructure/MicroCluster.hpp b/include/Algorithm/DataStructure/MicroCluster.hpp index 15037343..2af59d63 100644 --- a/include/Algorithm/DataStructure/MicroCluster.hpp +++ b/include/Algorithm/DataStructure/MicroCluster.hpp @@ -72,7 +72,6 @@ class MicroCluster { double getDistance(PointPtr datapoint);//DBStream double getDistance(MicroClusterPtr other);//DBStream void move();//DBStream - void decayWeight(double decayFactor); SESAME::MicroClusterPtr copy(); private: diff --git a/src/Algorithm/DBStream.cpp b/src/Algorithm/DBStream.cpp index 7713aafa..504ef7a9 100644 --- a/src/Algorithm/DBStream.cpp +++ b/src/Algorithm/DBStream.cpp @@ -5,8 +5,6 @@ #include #include -#pragma clang diagnostic push - SESAME::DBStream::DBStream(param_t &cmd_params){ this->dbStreamParams.radius=cmd_params.radius; this->dbStreamParams.lambda=cmd_params.lambda; @@ -20,42 +18,40 @@ SESAME::DBStream:: ~DBStream() void SESAME::DBStream::Initilize() { this->dampedWindow = WindowFactory::createDampedWindow(dbStreamParams.base, dbStreamParams.lambda); this->startTime = clock(); - this->pointArrivingTime= clock(); this->weakEntry= pow(dbStreamParams.base,(-1)*dbStreamParams.lambda*dbStreamParams.cleanUpInterval); this->aWeakEntry=weakEntry*dbStreamParams.alpha; + microClusters=unordered_set(); this->microClusterIndex=-1; } void SESAME::DBStream::runOnlineClustering(PointPtr input) { - + input; if (!this->isInitial) { Initilize(); this->isInitial = true; } else { - if(input->getIndex()) - this->pointArrivingTime=clock(); - update(input); + } } void SESAME::DBStream::runOfflineClustering(DataSinkPtr sinkPtr) { - sinkPtr; - reCluster(dbStreamParams.alpha); } - - - void SESAME::DBStream::update(PointPtr dataPoint){ + if(dataPoint->getIndex()==0) + { + this->pointArrivingTime=clock(); + } double decayFactor=dampedWindow->decayFunction(this->pointArrivingTime, clock()); this->pointArrivingTime=clock(); - this->microClusterNN=findFixedRadiusNN(dataPoint,decayFactor); - std::vector::size_type sizeNN=microClusterNN.size(); - if (microClusterNN.empty()) { + std::vector microClusterNN=findFixedRadiusNN(dataPoint,decayFactor); + int sizeNN=microClusterNN.size(); + if (microClusterNN.size() < 1) { microClusterIndex++; - MicroClusterPtr newMicroCluster=SESAME::DataStructureFactory::createMicroCluster(dbStreamParams.dimension,microClusterIndex, - dataPoint,dbStreamParams.radius); + MicroClusterPtr newMicroCluster=SESAME::DataStructureFactory::createMicroCluster(dbStreamParams.dimension, + microClusterIndex,dataPoint, + dbStreamParams.radius); microClusters.insert(newMicroCluster); microClusterNN.push_back(newMicroCluster); } @@ -68,8 +64,8 @@ void SESAME::DBStream::update(PointPtr dataPoint){ MicroClusterPairPtr microClusterPair =SESAME::DataStructureFactory::createMicroClusterPair(microCluster, microClusterNN.at(j)); if (weightedAdjacencyList.find(microClusterPair) != weightedAdjacencyList.end()) { - clock_t startT= weightedAdjacencyList[microClusterPair]->updateTime; - double decayValue = dampedWindow->decayFunction(startT,this->pointArrivingTime); + clock_t startTime= weightedAdjacencyList[microClusterPair]->updateTime; + double decayValue = dampedWindow->decayFunction(startTime,this->pointArrivingTime); weightedAdjacencyList[microClusterPair]->add(this->pointArrivingTime,decayValue); } else { AdjustedWeightPtr adjustedWeight = SESAME::DataStructureFactory::createAdjustedWeight(1,this->pointArrivingTime); @@ -77,130 +73,41 @@ void SESAME::DBStream::update(PointPtr dataPoint){ weightedAdjacencyList.insert(densityGraph); } } - } - if (checkMove(microClusterNN)) - for (const MicroClusterPtr& microCluster : microClusterNN) microCluster->move(); } - if (((pointArrivingTime-this->startTime)/CLOCKS_PER_SEC)% dbStreamParams.cleanUpInterval == 0 && dataPoint->getIndex()!=0) - cleanUp(this->pointArrivingTime); -} - -bool SESAME::DBStream::checkMove( std::vector microClustersList) const -{ - bool move=true; - if(!microClustersList.empty()) - { std::vector::size_type size = microClustersList.size(); - for (int i = 0; i < size; i++){ - for (int j = i + 1; j < size; j++){ - double distance=microClustersList.at(i)->getDistance(microClustersList.at(j)); - if (distance < dbStreamParams.radius) - move= false; + if (checkMove(microClusterNN)) { + for (MicroClusterPtr microCluster : microClusterNN) { + microCluster->move(); } } - } - return move; -} + if (pointArrivingTime% dbStreamParams.cleanUpInterval == 0 && dataPoint->getIndex()!=0) { + cleanUp(pointArrivingTime); -std::vector SESAME::DBStream::findFixedRadiusNN(PointPtr dataPoint, double decayFactor) -{ - std::vector result; - unordered_set::iterator iter; - for (iter= this->microClusters.begin();iter!= this->microClusters.end();iter++) { - (*iter)->decayWeight(decayFactor); - double distance = (*iter)->getDistance(dataPoint); - if (distance < dbStreamParams.radius) { - result.push_back(*iter); } + // return microClusterNN; } - return result; } -void SESAME::DBStream::cleanUp(clock_t nowTime){ - unordered_set removeMicroCluster; - unordered_set::iterator iter; - for (iter = microClusters.begin(); iter != microClusters.end(); iter++) { - if ((*iter)->weight <= this->weakEntry){ - removeMicroCluster.insert((*iter)->copy()); - microClusters.erase(iter); - } - } - WeightedAdjacencyList::iterator interW; - for (interW = weightedAdjacencyList.begin(); interW != weightedAdjacencyList.end(); interW++){ - if (removeMicroCluster.find(interW->first->microCluster1) !=removeMicroCluster.end() - || removeMicroCluster.find(interW->first->microCluster2)!=removeMicroCluster.end()) - weightedAdjacencyList.erase(interW); - else{ - double decayFactor=dampedWindow->decayFunction(interW->second->updateTime,nowTime); - if (interW->second->getCurrentWeight(decayFactor) < aWeakEntry) - weightedAdjacencyList.erase(interW); +bool SESAME::DBStream::checkMove( std::vector microClusters){ + int size = microClusters.size(); + for (int i = 0; i < size; i++) { + for (int j = i + 1; j < size; j++) { + if (microClusters.at(i)->getDistance(microClusters.at(j)) < dbStreamParams.radius) { + return false; + } } } + return true; } -void SESAME::DBStream::reCluster(double threshold){ - unordered_map> connectivityGraph;//C in DBStream paper - WeightedAdjacencyList::iterator interW; - for (interW = weightedAdjacencyList.begin(); interW != weightedAdjacencyList.end(); interW++){ - MicroClusterPtr microCluster1 =interW->first->microCluster1->copy(); - MicroClusterPtr microCluster2 =interW->first->microCluster2->copy(); - if (microCluster1->weight >= dbStreamParams.weightMin &µCluster2->weight >= dbStreamParams.weightMin){ - double val = 2*interW->second->weight / (microCluster1->weight+microCluster2->weight); - if (val > threshold) { - insertIntoGraph(connectivityGraph,microCluster1,microCluster2); - insertIntoGraph(connectivityGraph,microCluster2,microCluster1); - } - else - { - insertIntoGraph(connectivityGraph,microCluster1); - insertIntoGraph(connectivityGraph,microCluster2); - } - } - } - findConnectedComponents(connectivityGraph); +//TODO Stop here +std::vector SESAME::DBStream::findFixedRadiusNN(PointPtr dataPoint, double decayFactor) +{ + std::vector microClusterList; + dataPoint; + decayFactor; + return microClusterList; } -void SESAME::DBStream::insertIntoGraph(unordered_map> connectivityGraph, - MicroClusterPtr microCluster,MicroClusterPtr Other){ - if (connectivityGraph.find(microCluster)!=connectivityGraph.end()) - connectivityGraph.find(microCluster)->second.insert(Other); - else{ - microCluster->visited = false; - unordered_set newMicroClusterSet; - newMicroClusterSet.insert(Other); - connectivityGraph.insert(std::make_pair(microCluster,newMicroClusterSet)); - } -} -void SESAME::DBStream::insertIntoGraph(unordered_map> connectivityGraph, - MicroClusterPtr microCluster){ - if (connectivityGraph.find(microCluster)==connectivityGraph.end()) - { - microCluster->visited = false; - unordered_set newMicroClusterSet; - connectivityGraph.insert(std::make_pair(microCluster,newMicroClusterSet)); - } +void SESAME::DBStream::cleanUp(clock_t time){ + time; } -void SESAME::DBStream::findConnectedComponents(unordered_map> connectivityGraph){ - unordered_map>::iterator inter; - for (inter = connectivityGraph.begin(); inter != connectivityGraph.end(); inter++){ - if (!inter->first->visited) { - std::vector newCluster, clusterGroup; - newCluster.push_back(inter->first); - while (!newCluster.empty()) { - //after found the front one, insert it into clusterGroup and delete from the original vector - MicroClusterPtr microCluster = newCluster.front(); - newCluster.erase(newCluster.begin()); - clusterGroup.push_back(microCluster); - microCluster->visited = true; - - for(const auto & interS : inter->second) - { - if (!interS->visited) - newCluster.push_back(interS); - } - } - this->finalClusters.push_back(clusterGroup); - } - } - -} diff --git a/src/Algorithm/DataStructure/MicroCluster.cpp b/src/Algorithm/DataStructure/MicroCluster.cpp index 8df58fa6..693eff23 100644 --- a/src/Algorithm/DataStructure/MicroCluster.cpp +++ b/src/Algorithm/DataStructure/MicroCluster.cpp @@ -18,7 +18,6 @@ SESAME::MicroCluster::MicroCluster(int dimension, int id) this->createTime=clock(); this->lastUpdateTime=this->createTime; radius=0; - visited=false; } SESAME::MicroCluster::MicroCluster(int dimension, int id,PointPtr dataPoint,double radius){ this->dimension=dimension; @@ -26,7 +25,7 @@ SESAME::MicroCluster::MicroCluster(int dimension, int id,PointPtr dataPoint,doub this->id.push_back(id); LST=0; SST=0; - this->visited=true; + this->visited=false; this->createTime=clock(); this->lastUpdateTime=this->createTime; this->radius=radius; @@ -35,7 +34,6 @@ SESAME::MicroCluster::MicroCluster(int dimension, int id,PointPtr dataPoint,doub LS.push_back(data); centroid.push_back(data); } - } //Release memory of the current micro cluster @@ -301,9 +299,7 @@ void SESAME::MicroCluster::move(){ this->centroid=this->LS; } -void SESAME::MicroCluster::decayWeight(double decayFactor){ - this->weight *=decayFactor; -} + double SESAME::MicroCluster::inverseError(double x){ double z = sqrt(M_PI) * x; double res = (z) / 2; diff --git a/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp b/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp index a5cdadbd..b6529409 100644 --- a/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp +++ b/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp @@ -33,5 +33,3 @@ void SESAME::AdjustedWeight::add(clock_t startTime,double decayValue) { double SESAME::AdjustedWeight::getCurrentWeight(double decayFactor){ return weight * decayFactor; } - - From 2b9ff72d54285927dc7544a7daf0cf57ed509ca6 Mon Sep 17 00:00:00 2001 From: s1926539 Date: Tue, 31 Aug 2021 00:56:10 +0800 Subject: [PATCH 06/21] Finish Coding half of DBStream --- include/Algorithm/DBStream.hpp | 56 +++++++++ .../DataStructure/DataStructureFactory.hpp | 8 +- .../Algorithm/DataStructure/MicroCluster.hpp | 22 ++-- .../DataStructure/WeightedAdjacencyList.hpp | 44 +++++++ include/Algorithm/DenStream.hpp | 3 +- include/Utils/BenchmarkUtils.hpp | 5 + src/Algorithm/AlgorithmFactory.cpp | 5 + src/Algorithm/CMakeLists.txt | 1 + src/Algorithm/DBStream.cpp | 113 ++++++++++++++++++ src/Algorithm/DataStructure/CMakeLists.txt | 1 + .../DataStructure/DataStructureFactory.cpp | 19 ++- src/Algorithm/DataStructure/MicroCluster.cpp | 49 +++++++- src/Algorithm/DataStructure/Snapshot.cpp | 6 +- .../DataStructure/WeightedAdjacencyList.cpp | 35 ++++++ src/Algorithm/DenStream.cpp | 12 +- 15 files changed, 356 insertions(+), 23 deletions(-) create mode 100644 include/Algorithm/DBStream.hpp create mode 100644 include/Algorithm/DataStructure/WeightedAdjacencyList.hpp create mode 100644 src/Algorithm/DBStream.cpp create mode 100644 src/Algorithm/DataStructure/WeightedAdjacencyList.cpp diff --git a/include/Algorithm/DBStream.hpp b/include/Algorithm/DBStream.hpp new file mode 100644 index 00000000..85f65211 --- /dev/null +++ b/include/Algorithm/DBStream.hpp @@ -0,0 +1,56 @@ +// +// Created by 1124a on 2021/8/30. +// + +#ifndef SESAME_INCLUDE_ALGORITHM_DBSTREAM_HPP_ +#define SESAME_INCLUDE_ALGORITHM_DBSTREAM_HPP_ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace SESAME { +class DBStreamParams : public AlgorithmParameters { + public: + double radius; + double lambda; + int cleanUpInterval;//Tgap + double weightMin;//minimum weight + double alpha;//α, intersection factor + double base;//base of decay function +}; +class DBStream : public Algorithm + { + public: + DBStreamParams dbStreamParams; + DampedWindowPtr dampedWindow; + unordered_set microClusters; + SESAME::WeightedAdjacencyList weightedAdjacencyList; + double weakEntry;//W_weak, weak entries + double aWeakEntry; + clock_t startTime; + clock_t pointArrivingTime; + int microClusterIndex; + //TODO Need to implement weighted a weighted adjacency list S + DBStream(param_t &cmd_params); + ~DBStream(); + void Initilize() override; + void runOnlineClustering(PointPtr input) override; + void runOfflineClustering(DataSinkPtr sinkPtr) override; + private: + bool isInitial = false; + // vector initialBuffer; + void update(PointPtr dataPoint); + void cleanUp(clock_t time); + bool checkMove( std::vector microClusters); + std::vector findFixedRadiusNN(PointPtr dataPoint, double decayFactor); + }; + +} +#endif //SESAME_INCLUDE_ALGORITHM_DBSTREAM_HPP_ diff --git a/include/Algorithm/DataStructure/DataStructureFactory.hpp b/include/Algorithm/DataStructure/DataStructureFactory.hpp index 6ffea88c..21567023 100644 --- a/include/Algorithm/DataStructure/DataStructureFactory.hpp +++ b/include/Algorithm/DataStructure/DataStructureFactory.hpp @@ -13,7 +13,7 @@ #include #include #include - +#include namespace SESAME { class DataStructureFactory { @@ -28,12 +28,16 @@ class DataStructureFactory { static CoresetTreePtr createCoresetTree(); static void clearCoresetTree(CoresetTreePtr tree); static MicroClusterPtr createMicroCluster(int dimension, int id); + static MicroClusterPtr createMicroCluster(int dimension, int id,PointPtr dataPoint,double radius); static void clearMicroCluster(MicroClusterPtr microCluster); static SnapshotPtr createSnapshot(MicroClusters & otherMicroClusters,int elapsedTime); static void clearSnapshot(SnapshotPtr snapshot); static CFTreePtr createCFTree(); static NodePtr createNode(); - + static MicroClusterPairPtr createMicroClusterPair(MicroClusterPtr microCluster1,MicroClusterPtr microCluster2); + static void clearMicroClusterPair(MicroClusterPairPtr microClusterPair); + static AdjustedWeightPtr createAdjustedWeight(double weight, clock_t pointTime); + static void clearAdjustedWeight(AdjustedWeightPtr adjustedWeight); }; } #endif //SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_DATASTRUCTUREFACTORY_HPP_ diff --git a/include/Algorithm/DataStructure/MicroCluster.hpp b/include/Algorithm/DataStructure/MicroCluster.hpp index 330e3e67..2af59d63 100644 --- a/include/Algorithm/DataStructure/MicroCluster.hpp +++ b/include/Algorithm/DataStructure/MicroCluster.hpp @@ -32,23 +32,27 @@ class MicroCluster { int SST;//the sum of the squares of the time stamps Til... Tin double weight; //number of data point in the clusters int dimension; + double radius;//Used in DBStream + - //TODO Need to subtract Base class of CF vector when all cf-vector based-algorithms have been implemented //the parameters below is unique for DenStream clock_t createTime; clock_t lastUpdateTime; bool visited; - //TODO this may need to modify in the future (All algorithms used this, e.g.DenStream,CluStream,DenStream,DBStream,SWEM =.=) + //TODO 1. Need to subtract Base class of CF vector when all cf-vector based-algorithms have been implemented + // 2.this may need to modify in the future (All algorithms used this, e.g.DenStream,CluStream,DenStream,DBStream,SWEM =.=) + + MicroCluster(int dimension, int id); + MicroCluster(int dimension, int id,PointPtr dataPoint,double radius);//DBStream ~MicroCluster(); void init(PointPtr datapoint, int timestamp); - - void insert(PointPtr datapoint, int timestamp); - bool insert(PointPtr datapoint,double decayFactor,double epsilon);//Used in DenStream - + void insert(PointPtr datapoint, int timestamp);//Used in CluStream + bool insert(PointPtr datapoint,double decayFactor,double epsilon);// DenStream + void insert(PointPtr datapoint);//DBStream void merge(MicroClusterPtr other); - void substractClusterVector(MicroClusterPtr other); + void subtractClusterVector(MicroClusterPtr other); void updateId(MicroClusterPtr other); void resetID(int index); //Used in DenStream @@ -65,9 +69,13 @@ class MicroCluster { dataPoint getVarianceVector(); double calCentroidDistance(PointPtr datapoint); bool judgeMerge(MicroClusterPtr other); + double getDistance(PointPtr datapoint);//DBStream + double getDistance(MicroClusterPtr other);//DBStream + void move();//DBStream SESAME::MicroClusterPtr copy(); private: + double distance; static double inverseError(double x); }; } diff --git a/include/Algorithm/DataStructure/WeightedAdjacencyList.hpp b/include/Algorithm/DataStructure/WeightedAdjacencyList.hpp new file mode 100644 index 00000000..e7566e37 --- /dev/null +++ b/include/Algorithm/DataStructure/WeightedAdjacencyList.hpp @@ -0,0 +1,44 @@ +// +// Created by 1124a on 2021/8/30. +// + +#ifndef SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_WEIGHTEDADJACENCYLIST_HPP_ +#define SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_WEIGHTEDADJACENCYLIST_HPP_ +#include +#include +#include +#include +#include +#include +#include +#include +namespace SESAME { +class MicroClusterPair; +typedef std::shared_ptr MicroClusterPairPtr; +class MicroClusterPair{ + public: + MicroClusterPtr microCluster1; + MicroClusterPtr microCluster2; + MicroClusterPair( MicroClusterPtr microCluster1,MicroClusterPtr microCluster2); + bool equal(MicroClusterPairPtr other); +}; + +class AdjustedWeight; +typedef std::shared_ptr AdjustedWeightPtr; +class AdjustedWeight{ + public: + double weight; + clock_t updateTime; + AdjustedWeight(double weight, clock_t pointTime); + void add(clock_t startTime,double decayValue); + double getCurrentWeight(double decayFactor); +}; + +typedef std::unordered_map WeightedAdjacencyList; +typedef std::pair DensityGraph; +//S in paper, represent Weighted Adjacency List + +} + + +#endif //SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_WEIGHTEDADJACENCYLIST_HPP_ diff --git a/include/Algorithm/DenStream.hpp b/include/Algorithm/DenStream.hpp index a5a633ee..caa70b5a 100644 --- a/include/Algorithm/DenStream.hpp +++ b/include/Algorithm/DenStream.hpp @@ -40,7 +40,7 @@ class DenStreamParams : public AlgorithmParameters { clock_t startTime; clock_t pointArrivingTime; long Tp; - int iterpoint=0;//TODO DELETE LATER + int iterPoint=0;//TODO DELETE LATER int pMicroClusterIndex; int oMicroClusterIndex; @@ -62,7 +62,6 @@ class DenStreamParams : public AlgorithmParameters { int mergeToMicroCluster(PointPtr dataPoint,std::vector microClusters ); static void microClusterToPoint(std::vector µClusters, vector &points); - //TODO overlap functions with Clustream, may need to remove to utils folder }; } #endif //SESAME_INCLUDE_ALGORITHM_DENSTREAM_HPP_ diff --git a/include/Utils/BenchmarkUtils.hpp b/include/Utils/BenchmarkUtils.hpp index b4deaa52..183d2a7c 100644 --- a/include/Utils/BenchmarkUtils.hpp +++ b/include/Utils/BenchmarkUtils.hpp @@ -44,6 +44,11 @@ struct param_t { double lambda; double mu; double beta; + //used in DBStream + double radius; + int cleanUpInterval; + double weightMin; + double alpha; std::string inputPath; std::string outputPath; std::string algoName; diff --git a/src/Algorithm/AlgorithmFactory.cpp b/src/Algorithm/AlgorithmFactory.cpp index 1027e04f..a6696602 100644 --- a/src/Algorithm/AlgorithmFactory.cpp +++ b/src/Algorithm/AlgorithmFactory.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -27,5 +28,9 @@ SESAME::AlgorithmPtr SESAME::AlgorithmFactory::create(param_t &cmd_params) { shared_ptr denStream = std::make_shared(cmd_params); return (SESAME::AlgorithmPtr) denStream; } + if (cmd_params.algoName == "DBStream") { + shared_ptr dbStream = std::make_shared(cmd_params); + return (SESAME::AlgorithmPtr) dbStream; + } throw std::invalid_argument("Unsupported"); } diff --git a/src/Algorithm/CMakeLists.txt b/src/Algorithm/CMakeLists.txt index a31acc24..a6f7bf78 100644 --- a/src/Algorithm/CMakeLists.txt +++ b/src/Algorithm/CMakeLists.txt @@ -3,6 +3,7 @@ add_source_sesame( CluStream.cpp Birch.cpp DenStream.cpp + DBStream.cpp Algorithm.cpp AlgorithmFactory.cpp ) diff --git a/src/Algorithm/DBStream.cpp b/src/Algorithm/DBStream.cpp new file mode 100644 index 00000000..504ef7a9 --- /dev/null +++ b/src/Algorithm/DBStream.cpp @@ -0,0 +1,113 @@ +// +// Created by 1124a on 2021/8/30. +// +#include +#include +#include + +SESAME::DBStream::DBStream(param_t &cmd_params){ + this->dbStreamParams.radius=cmd_params.radius; + this->dbStreamParams.lambda=cmd_params.lambda; + this->dbStreamParams.cleanUpInterval=cmd_params.cleanUpInterval; + this->dbStreamParams.weightMin=cmd_params.weightMin; + this->dbStreamParams.alpha=cmd_params.alpha; + this->dbStreamParams.base=cmd_params.base; +} +SESAME::DBStream:: ~DBStream() += default; +void SESAME::DBStream::Initilize() { + this->dampedWindow = WindowFactory::createDampedWindow(dbStreamParams.base, dbStreamParams.lambda); + this->startTime = clock(); + this->weakEntry= pow(dbStreamParams.base,(-1)*dbStreamParams.lambda*dbStreamParams.cleanUpInterval); + this->aWeakEntry=weakEntry*dbStreamParams.alpha; + microClusters=unordered_set(); + this->microClusterIndex=-1; + } + + void SESAME::DBStream::runOnlineClustering(PointPtr input) { + input; + if (!this->isInitial) { + Initilize(); + this->isInitial = true; + } + else + { + + } +} +void SESAME::DBStream::runOfflineClustering(DataSinkPtr sinkPtr) { + sinkPtr; +} +void SESAME::DBStream::update(PointPtr dataPoint){ + if(dataPoint->getIndex()==0) + { + this->pointArrivingTime=clock(); + } + double decayFactor=dampedWindow->decayFunction(this->pointArrivingTime, clock()); + this->pointArrivingTime=clock(); + std::vector microClusterNN=findFixedRadiusNN(dataPoint,decayFactor); + int sizeNN=microClusterNN.size(); + if (microClusterNN.size() < 1) { + microClusterIndex++; + MicroClusterPtr newMicroCluster=SESAME::DataStructureFactory::createMicroCluster(dbStreamParams.dimension, + microClusterIndex,dataPoint, + dbStreamParams.radius); + microClusters.insert(newMicroCluster); + microClusterNN.push_back(newMicroCluster); + } + else { + for (int i = 0; i < sizeNN; i++) { + MicroClusterPtr microCluster = microClusterNN.at(i); + microCluster->insert(dataPoint); // just update weight + // update shared density + for (int j = i + 1; j < sizeNN; j++) { + MicroClusterPairPtr microClusterPair =SESAME::DataStructureFactory::createMicroClusterPair(microCluster, + microClusterNN.at(j)); + if (weightedAdjacencyList.find(microClusterPair) != weightedAdjacencyList.end()) { + clock_t startTime= weightedAdjacencyList[microClusterPair]->updateTime; + double decayValue = dampedWindow->decayFunction(startTime,this->pointArrivingTime); + weightedAdjacencyList[microClusterPair]->add(this->pointArrivingTime,decayValue); + } else { + AdjustedWeightPtr adjustedWeight = SESAME::DataStructureFactory::createAdjustedWeight(1,this->pointArrivingTime); + DensityGraph densityGraph(microClusterPair,adjustedWeight); + weightedAdjacencyList.insert(densityGraph); + } + } + } + if (checkMove(microClusterNN)) { + for (MicroClusterPtr microCluster : microClusterNN) { + microCluster->move(); + } + } + if (pointArrivingTime% dbStreamParams.cleanUpInterval == 0 && dataPoint->getIndex()!=0) { + cleanUp(pointArrivingTime); + + } + // return microClusterNN; + } +} +bool SESAME::DBStream::checkMove( std::vector microClusters){ + int size = microClusters.size(); + for (int i = 0; i < size; i++) { + for (int j = i + 1; j < size; j++) { + if (microClusters.at(i)->getDistance(microClusters.at(j)) < dbStreamParams.radius) { + return false; + } + } + } + return true; +} + +//TODO Stop here +std::vector SESAME::DBStream::findFixedRadiusNN(PointPtr dataPoint, double decayFactor) +{ + std::vector microClusterList; + dataPoint; + decayFactor; + return microClusterList; + +} +void SESAME::DBStream::cleanUp(clock_t time){ + time; +} + diff --git a/src/Algorithm/DataStructure/CMakeLists.txt b/src/Algorithm/DataStructure/CMakeLists.txt index 120bafaa..3336d264 100644 --- a/src/Algorithm/DataStructure/CMakeLists.txt +++ b/src/Algorithm/DataStructure/CMakeLists.txt @@ -4,6 +4,7 @@ add_source_sesame( CoresetTree.cpp MicroCluster.cpp Snapshot.cpp + WeightedAdjacencyList.cpp DataStructureFactory.cpp CFTree.cpp FeatureVector.cpp diff --git a/src/Algorithm/DataStructure/DataStructureFactory.cpp b/src/Algorithm/DataStructure/DataStructureFactory.cpp index b0397a81..bf973964 100644 --- a/src/Algorithm/DataStructure/DataStructureFactory.cpp +++ b/src/Algorithm/DataStructure/DataStructureFactory.cpp @@ -33,7 +33,9 @@ void SESAME::DataStructureFactory::clearCoresetTree(SESAME::CoresetTreePtr tree) SESAME::MicroClusterPtr SESAME::DataStructureFactory::createMicroCluster(int id, int dimension){ return std::make_shared( id, dimension); } - +SESAME::MicroClusterPtr SESAME::DataStructureFactory::createMicroCluster(int dimension, int id,PointPtr dataPoint,double radius){ + return std::make_shared( dimension, id,dataPoint,radius); +} void SESAME::DataStructureFactory::clearMicroCluster(SESAME::MicroClusterPtr microCluster){ microCluster.reset(); } @@ -51,3 +53,18 @@ SESAME::CFTreePtr SESAME::DataStructureFactory::createCFTree() { SESAME::NodePtr SESAME::DataStructureFactory::createNode() { return std::make_shared(); } +SESAME::MicroClusterPairPtr SESAME::DataStructureFactory::createMicroClusterPair(MicroClusterPtr microCluster1, + MicroClusterPtr microCluster2){ + return std::make_shared(microCluster1,microCluster2); +} + +void SESAME::DataStructureFactory::clearMicroClusterPair(MicroClusterPairPtr microClusterPair){ + microClusterPair.reset(); +} + +SESAME::AdjustedWeightPtr SESAME::DataStructureFactory::createAdjustedWeight(double weight, clock_t pointTime){ + return std::make_shared(weight,pointTime); +} +void SESAME::DataStructureFactory::clearAdjustedWeight(SESAME::AdjustedWeightPtr adjustedWeight){ + adjustedWeight.reset(); +} \ No newline at end of file diff --git a/src/Algorithm/DataStructure/MicroCluster.cpp b/src/Algorithm/DataStructure/MicroCluster.cpp index 147fea31..693eff23 100644 --- a/src/Algorithm/DataStructure/MicroCluster.cpp +++ b/src/Algorithm/DataStructure/MicroCluster.cpp @@ -17,6 +17,23 @@ SESAME::MicroCluster::MicroCluster(int dimension, int id) this->visited=false; this->createTime=clock(); this->lastUpdateTime=this->createTime; + radius=0; +} +SESAME::MicroCluster::MicroCluster(int dimension, int id,PointPtr dataPoint,double radius){ + this->dimension=dimension; + weight=1; + this->id.push_back(id); + LST=0; + SST=0; + this->visited=false; + this->createTime=clock(); + this->lastUpdateTime=this->createTime; + this->radius=radius; + for (int i = 0; i < dimension; i++) { + double data = dataPoint->getFeatureItem(i); + LS.push_back(data); + centroid.push_back(data); + } } //Release memory of the current micro cluster @@ -40,6 +57,7 @@ void SESAME::MicroCluster::init(PointPtr datapoint,int timestamp) SST+=timestamp*timestamp; } + //insert a new data point from input data stream void SESAME::MicroCluster::insert(PointPtr datapoint,int timestamp) { @@ -53,8 +71,29 @@ void SESAME::MicroCluster::insert(PointPtr datapoint,int timestamp) LST+=timestamp; SST+=timestamp*timestamp; centroid=std::move(getCentroid()); - } +void SESAME::MicroCluster::insert(PointPtr datapoint) +{ + weight++; + double val = exp(-(pow(3 * this->distance / radius, 2) / 2)); + for(int i=0; igetFeatureItem(i); + LS[i] = centroid.at(i) + val * (data - centroid.at(i)); + } +} +double SESAME::MicroCluster::getDistance(PointPtr datapoint){ + this->distance=calCentroidDistance(datapoint); + return this->distance; +} +double SESAME::MicroCluster::getDistance(MicroClusterPtr other){ + double temp = 0, dist = 0; + for (int i = 0; i < this->dimension; i++) { + temp = this->centroid[i] - other->centroid[i]; + dist += temp * temp; + } + return sqrt(dist); +} + bool SESAME::MicroCluster::insert(PointPtr datapoint,double decayFactor,double epsilon){ bool result; dataPoint LSPre; LSPre.assign(this->LS.begin(),this->LS.end()); @@ -97,7 +136,7 @@ void SESAME::MicroCluster::merge(MicroClusterPtr other){ } //Calculate the process of micro cluster N(Tc-h') -void SESAME::MicroCluster::substractClusterVector(MicroClusterPtr other) +void SESAME::MicroCluster::subtractClusterVector(MicroClusterPtr other) { this->weight-=other->weight; for(int i=0; icentroid=this->LS; +} + + double SESAME::MicroCluster::inverseError(double x){ double z = sqrt(M_PI) * x; double res = (z) / 2; diff --git a/src/Algorithm/DataStructure/Snapshot.cpp b/src/Algorithm/DataStructure/Snapshot.cpp index 694b5e3e..d72658db 100644 --- a/src/Algorithm/DataStructure/Snapshot.cpp +++ b/src/Algorithm/DataStructure/Snapshot.cpp @@ -59,13 +59,13 @@ SESAME::SnapshotPtr SESAME::Snapshot::substractSnapshot(SnapshotPtr snapshotCurr for(unsigned int j=0; jmicroClusters[j]->id.size()>1) { if(snapshotCurrent->microClusters[i]->judgeMerge(snapshotLandmark->microClusters[j])) - snapshotCurrent->microClusters[i]->substractClusterVector(snapshotLandmark->microClusters[j]); + snapshotCurrent->microClusters[i]->subtractClusterVector(snapshotLandmark->microClusters[j]); } else { int clusterIdLandmark; clusterIdLandmark = snapshotLandmark->microClusters[j]->id[0]; if(std::find(snapshotCurrent->microClusters[i]->id.begin(), snapshotCurrent->microClusters[i]->id.end(), clusterIdLandmark)!=snapshotCurrent->microClusters[i]->id.end()) - snapshotCurrent->microClusters[i]->substractClusterVector(snapshotLandmark->microClusters[j]); + snapshotCurrent->microClusters[i]->subtractClusterVector(snapshotLandmark->microClusters[j]); } } } @@ -75,7 +75,7 @@ SESAME::SnapshotPtr SESAME::Snapshot::substractSnapshot(SnapshotPtr snapshotCurr if(snapshotLandmark->microClusters[j]->id.size()==1) { int clusterIdLandmark=snapshotLandmark->microClusters[j]->id[0]; if(snapshotCurrent->microClusters[i]->id[0]==clusterIdLandmark) - snapshotCurrent->microClusters[i]->substractClusterVector(snapshotLandmark->microClusters[j]); + snapshotCurrent->microClusters[i]->subtractClusterVector(snapshotLandmark->microClusters[j]); } } } diff --git a/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp b/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp new file mode 100644 index 00000000..b6529409 --- /dev/null +++ b/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp @@ -0,0 +1,35 @@ +// +// Created by 1124a on 2021/8/30. +// +#include +SESAME::MicroClusterPair::MicroClusterPair( MicroClusterPtr microCluster1,MicroClusterPtr microCluster2){ + this->microCluster1=microCluster1->copy(); + this->microCluster2=microCluster2->copy(); +} + +bool SESAME::MicroClusterPair::equal(MicroClusterPairPtr other){ + bool equal=false; + if(other->microCluster1==this->microCluster1&&other->microCluster2==this->microCluster2) + equal=true; + if(other->microCluster1==this->microCluster2&&other->microCluster2==this->microCluster1) + equal=true; + return equal; +} +SESAME::AdjustedWeight::AdjustedWeight(double weight, clock_t pointTime){ + this->weight=weight; + this->updateTime=pointTime; +} +void SESAME::AdjustedWeight::add(clock_t startTime,double decayValue) { + if ( startTime == this->updateTime) { + weight++; + } + else { + weight *= decayValue + 1; + this->updateTime = clock(); + } +} + + +double SESAME::AdjustedWeight::getCurrentWeight(double decayFactor){ + return weight * decayFactor; +} diff --git a/src/Algorithm/DenStream.cpp b/src/Algorithm/DenStream.cpp index 37ec2f89..bd764a20 100644 --- a/src/Algorithm/DenStream.cpp +++ b/src/Algorithm/DenStream.cpp @@ -118,9 +118,9 @@ void SESAME::DenStream::runOnlineClustering(PointPtr input) { } } } - // SESAME_INFO("Insert Succeed "<pMicroClusters.empty()) { index=mergeToMicroCluster(dataPoint,this->pMicroClusters); - //SESAME_INFO("Merge into PMC! "<oMicroClusters.empty()) { index=mergeToMicroCluster(dataPoint,this->oMicroClusters); - // SESAME_INFO("Merge into OMC! "<=0) { double decayFactor= this->dampedWindow->decayFunction(this->oMicroClusters.at(index)->lastUpdateTime,pointArrivingTime); - //SESAME_INFO("Merge INTO OMC! "<oMicroClusters.at(index)->weight)*decayFactor>minWeight) { // SESAME_INFO("erase OMC and turn into PMC! "); @@ -156,7 +156,7 @@ void SESAME::DenStream::merge(PointPtr dataPoint){ MicroClusterPtr newOMicroCluster=DataStructureFactory::createMicroCluster(denStreamParams.dimension, oMicroClusterIndex); newOMicroCluster->init(dataPoint, 0); oMicroClusters.push_back(newOMicroCluster->copy()); - // SESAME_INFO("Create new OMC! "< microClusters){ From 656e35ca4b6401fa0edba803dabe6ff877792731 Mon Sep 17 00:00:00 2001 From: s1926539 Date: Tue, 31 Aug 2021 15:57:09 +0800 Subject: [PATCH 07/21] Finish Coding online part of DBStream --- include/Algorithm/DBStream.hpp | 6 +- .../Algorithm/DataStructure/MicroCluster.hpp | 1 + src/Algorithm/CluStream.cpp | 34 +++--- src/Algorithm/DBStream.cpp | 100 +++++++++++------- src/Algorithm/DataStructure/MicroCluster.cpp | 4 +- .../DataStructure/WeightedAdjacencyList.cpp | 2 + 6 files changed, 86 insertions(+), 61 deletions(-) diff --git a/include/Algorithm/DBStream.hpp b/include/Algorithm/DBStream.hpp index 85f65211..815334ae 100644 --- a/include/Algorithm/DBStream.hpp +++ b/include/Algorithm/DBStream.hpp @@ -32,6 +32,7 @@ class DBStream : public Algorithm DampedWindowPtr dampedWindow; unordered_set microClusters; SESAME::WeightedAdjacencyList weightedAdjacencyList; + std::vector microClusterNN; double weakEntry;//W_weak, weak entries double aWeakEntry; clock_t startTime; @@ -45,11 +46,10 @@ class DBStream : public Algorithm void runOfflineClustering(DataSinkPtr sinkPtr) override; private: bool isInitial = false; - // vector initialBuffer; void update(PointPtr dataPoint); - void cleanUp(clock_t time); - bool checkMove( std::vector microClusters); + bool checkMove( std::vector microClusters) const; std::vector findFixedRadiusNN(PointPtr dataPoint, double decayFactor); + void cleanUp(clock_t nowTime); }; } diff --git a/include/Algorithm/DataStructure/MicroCluster.hpp b/include/Algorithm/DataStructure/MicroCluster.hpp index 2af59d63..15037343 100644 --- a/include/Algorithm/DataStructure/MicroCluster.hpp +++ b/include/Algorithm/DataStructure/MicroCluster.hpp @@ -72,6 +72,7 @@ class MicroCluster { double getDistance(PointPtr datapoint);//DBStream double getDistance(MicroClusterPtr other);//DBStream void move();//DBStream + void decayWeight(double decayFactor); SESAME::MicroClusterPtr copy(); private: diff --git a/src/Algorithm/CluStream.cpp b/src/Algorithm/CluStream.cpp index 2eeb0d37..72626fba 100644 --- a/src/Algorithm/CluStream.cpp +++ b/src/Algorithm/CluStream.cpp @@ -9,8 +9,8 @@ #include /** * @Description: "offline" init micro clusters using KMeans - * @param size: The size of initial data obects, - * initialData:input intial data + * @param size: The size of initial data objects, + * initialData:input initial data *@Return: void */ SESAME::CluStream::CluStream(param_t &cmd_params) { @@ -98,7 +98,7 @@ double SESAME::CluStream::calRadius(MicroClusterPtr closestCluster) { radius = doubleMax; dataPoint centroid = closestCluster->getCentroid(); for (int i = 0; i < this->CluStreamParam.clusterNumber; i++) { - // SESAME_INFO("i is for wegf"<id == closestCluster->id) { continue; } @@ -118,7 +118,7 @@ void SESAME::CluStream::insertIntoCluster(PointPtr data, MicroClusterPtr operate operateCluster->insert(data, timestamp); } -//Delete oldest cluster and create new one case +//Delete the oldest cluster and create new one case void SESAME::CluStream::deleteCreateCluster(PointPtr data) { // 3.1 Try to forget old micro clusters @@ -172,7 +172,7 @@ void SESAME::CluStream::microClusterToPoint(std::vector µC for (int i = 0; i < this->CluStreamParam.clusterNumber; i++) { PointPtr point = DataStructureFactory::createPoint(i, microClusters[i]->weight, microClusters[i]->centroid.size(), 0); - for (int j = 0; j < microClusters[i]->centroid.size(); j++) + for (SESAME::dataPoint::size_type j = 0; j < microClusters[i]->centroid.size(); j++) point->setFeatureItem(microClusters[i]->centroid[j], j); //points; points.push_back(point); @@ -255,17 +255,17 @@ void SESAME::CluStream::runOfflineClustering(SESAME::DataSinkPtr sinkPtr) { landmarkTime = 0; SESAME_INFO("Start offline..."); SESAME::SnapshotPtr landmarkSnapshot; - SESAME::SnapshotPtr substractMiroCluster; + SESAME::SnapshotPtr subtractMiroCluster; //If offlineTimeWindow ==0, Only Observe the end results of micro clusters - substractMiroCluster = + subtractMiroCluster = DataStructureFactory::createSnapshot(microClusters, (int) ((now - startTime) / CLOCKS_PER_SEC)); SESAME_INFO("Now Miro Cluster is..."); for (int i = 0; i < CluStreamParam.clusterNumber; i++) { std::stringstream result, re2; - std::copy(substractMiroCluster->microClusters[i]->id.begin(), - substractMiroCluster->microClusters[i]->id.end(), + std::copy(subtractMiroCluster->microClusters[i]->id.begin(), + subtractMiroCluster->microClusters[i]->id.end(), std::ostream_iterator(re2, " ")); - SESAME_INFO("The ID is " << re2.str() << "weight is " << substractMiroCluster->microClusters[i]->weight); + SESAME_INFO("The ID is " << re2.str() << "weight is " << subtractMiroCluster->microClusters[i]->weight); } //The offline is to observe a process of data stream clustering @@ -282,21 +282,21 @@ void SESAME::CluStream::runOfflineClustering(SESAME::DataSinkPtr sinkPtr) { SESAME_INFO("The ID is " << re2.str() << "weight is " << landmarkSnapshot->microClusters[i]->weight); } if (landmarkSnapshot->elapsedTime == -1) - landmarkSnapshot = substractMiroCluster; + landmarkSnapshot = subtractMiroCluster; - substractMiroCluster = SESAME::Snapshot::substractSnapshot(substractMiroCluster, landmarkSnapshot, + subtractMiroCluster = SESAME::Snapshot::substractSnapshot(subtractMiroCluster, landmarkSnapshot, this->CluStreamParam.clusterNumber); } - SESAME_INFO("substract Miro Cluster is..."); + SESAME_INFO("subtract Miro Cluster is..."); for (int i = 0; i < CluStreamParam.clusterNumber; i++) { std::stringstream result, re2; - std::copy(substractMiroCluster->microClusters[i]->id.begin(), - substractMiroCluster->microClusters[i]->id.end(), + std::copy(subtractMiroCluster->microClusters[i]->id.begin(), + subtractMiroCluster->microClusters[i]->id.end(), std::ostream_iterator(re2, " ")); - SESAME_INFO("The ID is " << re2.str() << "weight is " << substractMiroCluster->microClusters[i]->weight); + SESAME_INFO("The ID is " << re2.str() << "weight is " << subtractMiroCluster->microClusters[i]->weight); } vector TransformedSnapshot; - microClusterToPoint(substractMiroCluster->microClusters, TransformedSnapshot); + microClusterToPoint(subtractMiroCluster->microClusters, TransformedSnapshot); SESAME_INFO("offline Cluster Number " << this->CluStreamParam.offlineClusterNumber << "Total number of p: " << TransformedSnapshot.size()); diff --git a/src/Algorithm/DBStream.cpp b/src/Algorithm/DBStream.cpp index 504ef7a9..fa2adead 100644 --- a/src/Algorithm/DBStream.cpp +++ b/src/Algorithm/DBStream.cpp @@ -5,6 +5,8 @@ #include #include +#pragma clang diagnostic push + SESAME::DBStream::DBStream(param_t &cmd_params){ this->dbStreamParams.radius=cmd_params.radius; this->dbStreamParams.lambda=cmd_params.lambda; @@ -18,40 +20,37 @@ SESAME::DBStream:: ~DBStream() void SESAME::DBStream::Initilize() { this->dampedWindow = WindowFactory::createDampedWindow(dbStreamParams.base, dbStreamParams.lambda); this->startTime = clock(); + this->pointArrivingTime= clock(); this->weakEntry= pow(dbStreamParams.base,(-1)*dbStreamParams.lambda*dbStreamParams.cleanUpInterval); this->aWeakEntry=weakEntry*dbStreamParams.alpha; - microClusters=unordered_set(); this->microClusterIndex=-1; } void SESAME::DBStream::runOnlineClustering(PointPtr input) { - input; + if (!this->isInitial) { Initilize(); this->isInitial = true; } else { - + if(input->getIndex()) + this->pointArrivingTime=clock(); + update(input); } } void SESAME::DBStream::runOfflineClustering(DataSinkPtr sinkPtr) { sinkPtr; } void SESAME::DBStream::update(PointPtr dataPoint){ - if(dataPoint->getIndex()==0) - { - this->pointArrivingTime=clock(); - } double decayFactor=dampedWindow->decayFunction(this->pointArrivingTime, clock()); this->pointArrivingTime=clock(); - std::vector microClusterNN=findFixedRadiusNN(dataPoint,decayFactor); - int sizeNN=microClusterNN.size(); - if (microClusterNN.size() < 1) { + this->microClusterNN=findFixedRadiusNN(dataPoint,decayFactor); + std::vector::size_type sizeNN=microClusterNN.size(); + if (microClusterNN.empty()) { microClusterIndex++; - MicroClusterPtr newMicroCluster=SESAME::DataStructureFactory::createMicroCluster(dbStreamParams.dimension, - microClusterIndex,dataPoint, - dbStreamParams.radius); + MicroClusterPtr newMicroCluster=SESAME::DataStructureFactory::createMicroCluster(dbStreamParams.dimension,microClusterIndex, + dataPoint,dbStreamParams.radius); microClusters.insert(newMicroCluster); microClusterNN.push_back(newMicroCluster); } @@ -64,8 +63,8 @@ void SESAME::DBStream::update(PointPtr dataPoint){ MicroClusterPairPtr microClusterPair =SESAME::DataStructureFactory::createMicroClusterPair(microCluster, microClusterNN.at(j)); if (weightedAdjacencyList.find(microClusterPair) != weightedAdjacencyList.end()) { - clock_t startTime= weightedAdjacencyList[microClusterPair]->updateTime; - double decayValue = dampedWindow->decayFunction(startTime,this->pointArrivingTime); + clock_t startT= weightedAdjacencyList[microClusterPair]->updateTime; + double decayValue = dampedWindow->decayFunction(startT,this->pointArrivingTime); weightedAdjacencyList[microClusterPair]->add(this->pointArrivingTime,decayValue); } else { AdjustedWeightPtr adjustedWeight = SESAME::DataStructureFactory::createAdjustedWeight(1,this->pointArrivingTime); @@ -73,41 +72,62 @@ void SESAME::DBStream::update(PointPtr dataPoint){ weightedAdjacencyList.insert(densityGraph); } } - } - if (checkMove(microClusterNN)) { - for (MicroClusterPtr microCluster : microClusterNN) { - microCluster->move(); - } - } - if (pointArrivingTime% dbStreamParams.cleanUpInterval == 0 && dataPoint->getIndex()!=0) { - cleanUp(pointArrivingTime); - } - // return microClusterNN; + if (checkMove(microClusterNN)) + for (const MicroClusterPtr& microCluster : microClusterNN) microCluster->move(); } + if (((pointArrivingTime-this->startTime)/CLOCKS_PER_SEC)% dbStreamParams.cleanUpInterval == 0 && dataPoint->getIndex()!=0) + cleanUp(this->pointArrivingTime); } -bool SESAME::DBStream::checkMove( std::vector microClusters){ - int size = microClusters.size(); - for (int i = 0; i < size; i++) { - for (int j = i + 1; j < size; j++) { - if (microClusters.at(i)->getDistance(microClusters.at(j)) < dbStreamParams.radius) { - return false; + +bool SESAME::DBStream::checkMove( std::vector microClustersList) const +{ + bool move=true; + if(!microClustersList.empty()) + { std::vector::size_type size = microClustersList.size(); + for (int i = 0; i < size; i++){ + for (int j = i + 1; j < size; j++){ + double distance=microClustersList.at(i)->getDistance(microClustersList.at(j)); + if (distance < dbStreamParams.radius) + move= false; } } } - return true; + return move; } -//TODO Stop here std::vector SESAME::DBStream::findFixedRadiusNN(PointPtr dataPoint, double decayFactor) { - std::vector microClusterList; - dataPoint; - decayFactor; - return microClusterList; - + std::vector result; + unordered_set::iterator iter; + for (iter= this->microClusters.begin();iter!= this->microClusters.end();iter++) { + (*iter)->decayWeight(decayFactor); + double distance = (*iter)->getDistance(dataPoint); + if (distance < dbStreamParams.radius) { + result.push_back(*iter); + } + } + return result; } -void SESAME::DBStream::cleanUp(clock_t time){ - time; +void SESAME::DBStream::cleanUp(clock_t nowTime){ + unordered_set removeMicroCluster; + unordered_set::iterator iter; + for (iter = microClusters.begin(); iter != microClusters.end(); iter++) { + if ((*iter)->weight <= this->weakEntry){ + removeMicroCluster.insert((*iter)->copy()); + microClusters.erase(iter); + } + } + WeightedAdjacencyList::iterator interW; + for (interW = weightedAdjacencyList.begin(); interW != weightedAdjacencyList.end(); interW++){ + if (removeMicroCluster.find(interW->first->microCluster1) !=removeMicroCluster.end() + || removeMicroCluster.find(interW->first->microCluster2)!=removeMicroCluster.end()) + weightedAdjacencyList.erase(interW); + else{ + double decayFactor=dampedWindow->decayFunction(interW->second->updateTime,nowTime); + if (interW->second->getCurrentWeight(decayFactor) < aWeakEntry) + weightedAdjacencyList.erase(interW); + } + } } diff --git a/src/Algorithm/DataStructure/MicroCluster.cpp b/src/Algorithm/DataStructure/MicroCluster.cpp index 693eff23..60e0f6f4 100644 --- a/src/Algorithm/DataStructure/MicroCluster.cpp +++ b/src/Algorithm/DataStructure/MicroCluster.cpp @@ -299,7 +299,9 @@ void SESAME::MicroCluster::move(){ this->centroid=this->LS; } - +void SESAME::MicroCluster::decayWeight(double decayFactor){ + this->weight *=decayFactor; +} double SESAME::MicroCluster::inverseError(double x){ double z = sqrt(M_PI) * x; double res = (z) / 2; diff --git a/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp b/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp index b6529409..a5cdadbd 100644 --- a/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp +++ b/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp @@ -33,3 +33,5 @@ void SESAME::AdjustedWeight::add(clock_t startTime,double decayValue) { double SESAME::AdjustedWeight::getCurrentWeight(double decayFactor){ return weight * decayFactor; } + + From a97bc970c46612224a352ff7db2b551a65ce956d Mon Sep 17 00:00:00 2001 From: s1926539 Date: Wed, 1 Sep 2021 13:00:49 +0800 Subject: [PATCH 08/21] Finish DBStream coding, still has problems in OfflineClustering, how to transfer microCluster into Point --- include/Algorithm/DBStream.hpp | 11 ++- src/Algorithm/DBStream.cpp | 73 ++++++++++++++++++++ src/Algorithm/DataStructure/MicroCluster.cpp | 4 +- 3 files changed, 86 insertions(+), 2 deletions(-) diff --git a/include/Algorithm/DBStream.hpp b/include/Algorithm/DBStream.hpp index 815334ae..683def98 100644 --- a/include/Algorithm/DBStream.hpp +++ b/include/Algorithm/DBStream.hpp @@ -16,6 +16,7 @@ #include #include namespace SESAME { +typedef std::vector> Clusters; class DBStreamParams : public AlgorithmParameters { public: double radius; @@ -32,12 +33,14 @@ class DBStream : public Algorithm DampedWindowPtr dampedWindow; unordered_set microClusters; SESAME::WeightedAdjacencyList weightedAdjacencyList; - std::vector microClusterNN; + std::vector microClusterNN;//micro clusters found in function findFixedRadiusNN double weakEntry;//W_weak, weak entries double aWeakEntry; clock_t startTime; clock_t pointArrivingTime; int microClusterIndex; + //Final output of clusters + Clusters finalClusters; //TODO Need to implement weighted a weighted adjacency list S DBStream(param_t &cmd_params); ~DBStream(); @@ -50,6 +53,12 @@ class DBStream : public Algorithm bool checkMove( std::vector microClusters) const; std::vector findFixedRadiusNN(PointPtr dataPoint, double decayFactor); void cleanUp(clock_t nowTime); + void reCluster(double threshold); + static void insertIntoGraph(unordered_map> connectivityGraph, + MicroClusterPtr microCluster,MicroClusterPtr Other); + static void insertIntoGraph(unordered_map> connectivityGraph, + MicroClusterPtr microCluster); + void findConnectedComponents(unordered_map> connectivityGraph); }; } diff --git a/src/Algorithm/DBStream.cpp b/src/Algorithm/DBStream.cpp index fa2adead..7713aafa 100644 --- a/src/Algorithm/DBStream.cpp +++ b/src/Algorithm/DBStream.cpp @@ -40,8 +40,13 @@ void SESAME::DBStream::Initilize() { } } void SESAME::DBStream::runOfflineClustering(DataSinkPtr sinkPtr) { + sinkPtr; + reCluster(dbStreamParams.alpha); } + + + void SESAME::DBStream::update(PointPtr dataPoint){ double decayFactor=dampedWindow->decayFunction(this->pointArrivingTime, clock()); this->pointArrivingTime=clock(); @@ -131,3 +136,71 @@ void SESAME::DBStream::cleanUp(clock_t nowTime){ } } +void SESAME::DBStream::reCluster(double threshold){ + unordered_map> connectivityGraph;//C in DBStream paper + WeightedAdjacencyList::iterator interW; + for (interW = weightedAdjacencyList.begin(); interW != weightedAdjacencyList.end(); interW++){ + MicroClusterPtr microCluster1 =interW->first->microCluster1->copy(); + MicroClusterPtr microCluster2 =interW->first->microCluster2->copy(); + if (microCluster1->weight >= dbStreamParams.weightMin &µCluster2->weight >= dbStreamParams.weightMin){ + double val = 2*interW->second->weight / (microCluster1->weight+microCluster2->weight); + if (val > threshold) { + insertIntoGraph(connectivityGraph,microCluster1,microCluster2); + insertIntoGraph(connectivityGraph,microCluster2,microCluster1); + } + else + { + insertIntoGraph(connectivityGraph,microCluster1); + insertIntoGraph(connectivityGraph,microCluster2); + } + } + } + findConnectedComponents(connectivityGraph); + +} +void SESAME::DBStream::insertIntoGraph(unordered_map> connectivityGraph, + MicroClusterPtr microCluster,MicroClusterPtr Other){ + if (connectivityGraph.find(microCluster)!=connectivityGraph.end()) + connectivityGraph.find(microCluster)->second.insert(Other); + else{ + microCluster->visited = false; + unordered_set newMicroClusterSet; + newMicroClusterSet.insert(Other); + connectivityGraph.insert(std::make_pair(microCluster,newMicroClusterSet)); + } +} +void SESAME::DBStream::insertIntoGraph(unordered_map> connectivityGraph, + MicroClusterPtr microCluster){ + if (connectivityGraph.find(microCluster)==connectivityGraph.end()) + { + microCluster->visited = false; + unordered_set newMicroClusterSet; + connectivityGraph.insert(std::make_pair(microCluster,newMicroClusterSet)); + } +} + +void SESAME::DBStream::findConnectedComponents(unordered_map> connectivityGraph){ + unordered_map>::iterator inter; + for (inter = connectivityGraph.begin(); inter != connectivityGraph.end(); inter++){ + if (!inter->first->visited) { + std::vector newCluster, clusterGroup; + newCluster.push_back(inter->first); + while (!newCluster.empty()) { + //after found the front one, insert it into clusterGroup and delete from the original vector + MicroClusterPtr microCluster = newCluster.front(); + newCluster.erase(newCluster.begin()); + clusterGroup.push_back(microCluster); + microCluster->visited = true; + + for(const auto & interS : inter->second) + { + if (!interS->visited) + newCluster.push_back(interS); + } + } + this->finalClusters.push_back(clusterGroup); + } + } + +} diff --git a/src/Algorithm/DataStructure/MicroCluster.cpp b/src/Algorithm/DataStructure/MicroCluster.cpp index 60e0f6f4..8df58fa6 100644 --- a/src/Algorithm/DataStructure/MicroCluster.cpp +++ b/src/Algorithm/DataStructure/MicroCluster.cpp @@ -18,6 +18,7 @@ SESAME::MicroCluster::MicroCluster(int dimension, int id) this->createTime=clock(); this->lastUpdateTime=this->createTime; radius=0; + visited=false; } SESAME::MicroCluster::MicroCluster(int dimension, int id,PointPtr dataPoint,double radius){ this->dimension=dimension; @@ -25,7 +26,7 @@ SESAME::MicroCluster::MicroCluster(int dimension, int id,PointPtr dataPoint,doub this->id.push_back(id); LST=0; SST=0; - this->visited=false; + this->visited=true; this->createTime=clock(); this->lastUpdateTime=this->createTime; this->radius=radius; @@ -34,6 +35,7 @@ SESAME::MicroCluster::MicroCluster(int dimension, int id,PointPtr dataPoint,doub LS.push_back(data); centroid.push_back(data); } + } //Release memory of the current micro cluster From 69283c83d621e818ac7b94543096b4a4fccc698f Mon Sep 17 00:00:00 2001 From: s1926539 Date: Tue, 31 Aug 2021 00:56:10 +0800 Subject: [PATCH 09/21] Finish Coding half of DBStream --- include/Algorithm/DBStream.hpp | 15 +- .../Algorithm/DataStructure/MicroCluster.hpp | 1 - src/Algorithm/DBStream.cpp | 169 ++++-------------- src/Algorithm/DataStructure/MicroCluster.cpp | 8 +- .../DataStructure/WeightedAdjacencyList.cpp | 2 - 5 files changed, 43 insertions(+), 152 deletions(-) diff --git a/include/Algorithm/DBStream.hpp b/include/Algorithm/DBStream.hpp index 683def98..85f65211 100644 --- a/include/Algorithm/DBStream.hpp +++ b/include/Algorithm/DBStream.hpp @@ -16,7 +16,6 @@ #include #include namespace SESAME { -typedef std::vector> Clusters; class DBStreamParams : public AlgorithmParameters { public: double radius; @@ -33,14 +32,11 @@ class DBStream : public Algorithm DampedWindowPtr dampedWindow; unordered_set microClusters; SESAME::WeightedAdjacencyList weightedAdjacencyList; - std::vector microClusterNN;//micro clusters found in function findFixedRadiusNN double weakEntry;//W_weak, weak entries double aWeakEntry; clock_t startTime; clock_t pointArrivingTime; int microClusterIndex; - //Final output of clusters - Clusters finalClusters; //TODO Need to implement weighted a weighted adjacency list S DBStream(param_t &cmd_params); ~DBStream(); @@ -49,16 +45,11 @@ class DBStream : public Algorithm void runOfflineClustering(DataSinkPtr sinkPtr) override; private: bool isInitial = false; + // vector initialBuffer; void update(PointPtr dataPoint); - bool checkMove( std::vector microClusters) const; + void cleanUp(clock_t time); + bool checkMove( std::vector microClusters); std::vector findFixedRadiusNN(PointPtr dataPoint, double decayFactor); - void cleanUp(clock_t nowTime); - void reCluster(double threshold); - static void insertIntoGraph(unordered_map> connectivityGraph, - MicroClusterPtr microCluster,MicroClusterPtr Other); - static void insertIntoGraph(unordered_map> connectivityGraph, - MicroClusterPtr microCluster); - void findConnectedComponents(unordered_map> connectivityGraph); }; } diff --git a/include/Algorithm/DataStructure/MicroCluster.hpp b/include/Algorithm/DataStructure/MicroCluster.hpp index 15037343..2af59d63 100644 --- a/include/Algorithm/DataStructure/MicroCluster.hpp +++ b/include/Algorithm/DataStructure/MicroCluster.hpp @@ -72,7 +72,6 @@ class MicroCluster { double getDistance(PointPtr datapoint);//DBStream double getDistance(MicroClusterPtr other);//DBStream void move();//DBStream - void decayWeight(double decayFactor); SESAME::MicroClusterPtr copy(); private: diff --git a/src/Algorithm/DBStream.cpp b/src/Algorithm/DBStream.cpp index 7713aafa..504ef7a9 100644 --- a/src/Algorithm/DBStream.cpp +++ b/src/Algorithm/DBStream.cpp @@ -5,8 +5,6 @@ #include #include -#pragma clang diagnostic push - SESAME::DBStream::DBStream(param_t &cmd_params){ this->dbStreamParams.radius=cmd_params.radius; this->dbStreamParams.lambda=cmd_params.lambda; @@ -20,42 +18,40 @@ SESAME::DBStream:: ~DBStream() void SESAME::DBStream::Initilize() { this->dampedWindow = WindowFactory::createDampedWindow(dbStreamParams.base, dbStreamParams.lambda); this->startTime = clock(); - this->pointArrivingTime= clock(); this->weakEntry= pow(dbStreamParams.base,(-1)*dbStreamParams.lambda*dbStreamParams.cleanUpInterval); this->aWeakEntry=weakEntry*dbStreamParams.alpha; + microClusters=unordered_set(); this->microClusterIndex=-1; } void SESAME::DBStream::runOnlineClustering(PointPtr input) { - + input; if (!this->isInitial) { Initilize(); this->isInitial = true; } else { - if(input->getIndex()) - this->pointArrivingTime=clock(); - update(input); + } } void SESAME::DBStream::runOfflineClustering(DataSinkPtr sinkPtr) { - sinkPtr; - reCluster(dbStreamParams.alpha); } - - - void SESAME::DBStream::update(PointPtr dataPoint){ + if(dataPoint->getIndex()==0) + { + this->pointArrivingTime=clock(); + } double decayFactor=dampedWindow->decayFunction(this->pointArrivingTime, clock()); this->pointArrivingTime=clock(); - this->microClusterNN=findFixedRadiusNN(dataPoint,decayFactor); - std::vector::size_type sizeNN=microClusterNN.size(); - if (microClusterNN.empty()) { + std::vector microClusterNN=findFixedRadiusNN(dataPoint,decayFactor); + int sizeNN=microClusterNN.size(); + if (microClusterNN.size() < 1) { microClusterIndex++; - MicroClusterPtr newMicroCluster=SESAME::DataStructureFactory::createMicroCluster(dbStreamParams.dimension,microClusterIndex, - dataPoint,dbStreamParams.radius); + MicroClusterPtr newMicroCluster=SESAME::DataStructureFactory::createMicroCluster(dbStreamParams.dimension, + microClusterIndex,dataPoint, + dbStreamParams.radius); microClusters.insert(newMicroCluster); microClusterNN.push_back(newMicroCluster); } @@ -68,8 +64,8 @@ void SESAME::DBStream::update(PointPtr dataPoint){ MicroClusterPairPtr microClusterPair =SESAME::DataStructureFactory::createMicroClusterPair(microCluster, microClusterNN.at(j)); if (weightedAdjacencyList.find(microClusterPair) != weightedAdjacencyList.end()) { - clock_t startT= weightedAdjacencyList[microClusterPair]->updateTime; - double decayValue = dampedWindow->decayFunction(startT,this->pointArrivingTime); + clock_t startTime= weightedAdjacencyList[microClusterPair]->updateTime; + double decayValue = dampedWindow->decayFunction(startTime,this->pointArrivingTime); weightedAdjacencyList[microClusterPair]->add(this->pointArrivingTime,decayValue); } else { AdjustedWeightPtr adjustedWeight = SESAME::DataStructureFactory::createAdjustedWeight(1,this->pointArrivingTime); @@ -77,130 +73,41 @@ void SESAME::DBStream::update(PointPtr dataPoint){ weightedAdjacencyList.insert(densityGraph); } } - } - if (checkMove(microClusterNN)) - for (const MicroClusterPtr& microCluster : microClusterNN) microCluster->move(); } - if (((pointArrivingTime-this->startTime)/CLOCKS_PER_SEC)% dbStreamParams.cleanUpInterval == 0 && dataPoint->getIndex()!=0) - cleanUp(this->pointArrivingTime); -} - -bool SESAME::DBStream::checkMove( std::vector microClustersList) const -{ - bool move=true; - if(!microClustersList.empty()) - { std::vector::size_type size = microClustersList.size(); - for (int i = 0; i < size; i++){ - for (int j = i + 1; j < size; j++){ - double distance=microClustersList.at(i)->getDistance(microClustersList.at(j)); - if (distance < dbStreamParams.radius) - move= false; + if (checkMove(microClusterNN)) { + for (MicroClusterPtr microCluster : microClusterNN) { + microCluster->move(); } } - } - return move; -} + if (pointArrivingTime% dbStreamParams.cleanUpInterval == 0 && dataPoint->getIndex()!=0) { + cleanUp(pointArrivingTime); -std::vector SESAME::DBStream::findFixedRadiusNN(PointPtr dataPoint, double decayFactor) -{ - std::vector result; - unordered_set::iterator iter; - for (iter= this->microClusters.begin();iter!= this->microClusters.end();iter++) { - (*iter)->decayWeight(decayFactor); - double distance = (*iter)->getDistance(dataPoint); - if (distance < dbStreamParams.radius) { - result.push_back(*iter); } + // return microClusterNN; } - return result; } -void SESAME::DBStream::cleanUp(clock_t nowTime){ - unordered_set removeMicroCluster; - unordered_set::iterator iter; - for (iter = microClusters.begin(); iter != microClusters.end(); iter++) { - if ((*iter)->weight <= this->weakEntry){ - removeMicroCluster.insert((*iter)->copy()); - microClusters.erase(iter); - } - } - WeightedAdjacencyList::iterator interW; - for (interW = weightedAdjacencyList.begin(); interW != weightedAdjacencyList.end(); interW++){ - if (removeMicroCluster.find(interW->first->microCluster1) !=removeMicroCluster.end() - || removeMicroCluster.find(interW->first->microCluster2)!=removeMicroCluster.end()) - weightedAdjacencyList.erase(interW); - else{ - double decayFactor=dampedWindow->decayFunction(interW->second->updateTime,nowTime); - if (interW->second->getCurrentWeight(decayFactor) < aWeakEntry) - weightedAdjacencyList.erase(interW); +bool SESAME::DBStream::checkMove( std::vector microClusters){ + int size = microClusters.size(); + for (int i = 0; i < size; i++) { + for (int j = i + 1; j < size; j++) { + if (microClusters.at(i)->getDistance(microClusters.at(j)) < dbStreamParams.radius) { + return false; + } } } + return true; } -void SESAME::DBStream::reCluster(double threshold){ - unordered_map> connectivityGraph;//C in DBStream paper - WeightedAdjacencyList::iterator interW; - for (interW = weightedAdjacencyList.begin(); interW != weightedAdjacencyList.end(); interW++){ - MicroClusterPtr microCluster1 =interW->first->microCluster1->copy(); - MicroClusterPtr microCluster2 =interW->first->microCluster2->copy(); - if (microCluster1->weight >= dbStreamParams.weightMin &µCluster2->weight >= dbStreamParams.weightMin){ - double val = 2*interW->second->weight / (microCluster1->weight+microCluster2->weight); - if (val > threshold) { - insertIntoGraph(connectivityGraph,microCluster1,microCluster2); - insertIntoGraph(connectivityGraph,microCluster2,microCluster1); - } - else - { - insertIntoGraph(connectivityGraph,microCluster1); - insertIntoGraph(connectivityGraph,microCluster2); - } - } - } - findConnectedComponents(connectivityGraph); +//TODO Stop here +std::vector SESAME::DBStream::findFixedRadiusNN(PointPtr dataPoint, double decayFactor) +{ + std::vector microClusterList; + dataPoint; + decayFactor; + return microClusterList; } -void SESAME::DBStream::insertIntoGraph(unordered_map> connectivityGraph, - MicroClusterPtr microCluster,MicroClusterPtr Other){ - if (connectivityGraph.find(microCluster)!=connectivityGraph.end()) - connectivityGraph.find(microCluster)->second.insert(Other); - else{ - microCluster->visited = false; - unordered_set newMicroClusterSet; - newMicroClusterSet.insert(Other); - connectivityGraph.insert(std::make_pair(microCluster,newMicroClusterSet)); - } -} -void SESAME::DBStream::insertIntoGraph(unordered_map> connectivityGraph, - MicroClusterPtr microCluster){ - if (connectivityGraph.find(microCluster)==connectivityGraph.end()) - { - microCluster->visited = false; - unordered_set newMicroClusterSet; - connectivityGraph.insert(std::make_pair(microCluster,newMicroClusterSet)); - } +void SESAME::DBStream::cleanUp(clock_t time){ + time; } -void SESAME::DBStream::findConnectedComponents(unordered_map> connectivityGraph){ - unordered_map>::iterator inter; - for (inter = connectivityGraph.begin(); inter != connectivityGraph.end(); inter++){ - if (!inter->first->visited) { - std::vector newCluster, clusterGroup; - newCluster.push_back(inter->first); - while (!newCluster.empty()) { - //after found the front one, insert it into clusterGroup and delete from the original vector - MicroClusterPtr microCluster = newCluster.front(); - newCluster.erase(newCluster.begin()); - clusterGroup.push_back(microCluster); - microCluster->visited = true; - - for(const auto & interS : inter->second) - { - if (!interS->visited) - newCluster.push_back(interS); - } - } - this->finalClusters.push_back(clusterGroup); - } - } - -} diff --git a/src/Algorithm/DataStructure/MicroCluster.cpp b/src/Algorithm/DataStructure/MicroCluster.cpp index 8df58fa6..693eff23 100644 --- a/src/Algorithm/DataStructure/MicroCluster.cpp +++ b/src/Algorithm/DataStructure/MicroCluster.cpp @@ -18,7 +18,6 @@ SESAME::MicroCluster::MicroCluster(int dimension, int id) this->createTime=clock(); this->lastUpdateTime=this->createTime; radius=0; - visited=false; } SESAME::MicroCluster::MicroCluster(int dimension, int id,PointPtr dataPoint,double radius){ this->dimension=dimension; @@ -26,7 +25,7 @@ SESAME::MicroCluster::MicroCluster(int dimension, int id,PointPtr dataPoint,doub this->id.push_back(id); LST=0; SST=0; - this->visited=true; + this->visited=false; this->createTime=clock(); this->lastUpdateTime=this->createTime; this->radius=radius; @@ -35,7 +34,6 @@ SESAME::MicroCluster::MicroCluster(int dimension, int id,PointPtr dataPoint,doub LS.push_back(data); centroid.push_back(data); } - } //Release memory of the current micro cluster @@ -301,9 +299,7 @@ void SESAME::MicroCluster::move(){ this->centroid=this->LS; } -void SESAME::MicroCluster::decayWeight(double decayFactor){ - this->weight *=decayFactor; -} + double SESAME::MicroCluster::inverseError(double x){ double z = sqrt(M_PI) * x; double res = (z) / 2; diff --git a/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp b/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp index a5cdadbd..b6529409 100644 --- a/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp +++ b/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp @@ -33,5 +33,3 @@ void SESAME::AdjustedWeight::add(clock_t startTime,double decayValue) { double SESAME::AdjustedWeight::getCurrentWeight(double decayFactor){ return weight * decayFactor; } - - From 015e611b91b9da2a8f8c7453649dc41baf531829 Mon Sep 17 00:00:00 2001 From: s1926539 Date: Tue, 31 Aug 2021 15:57:09 +0800 Subject: [PATCH 10/21] Finish Coding online part of DBStream --- include/Algorithm/DBStream.hpp | 6 +- .../Algorithm/DataStructure/MicroCluster.hpp | 1 + src/Algorithm/DBStream.cpp | 100 +++++++++++------- src/Algorithm/DataStructure/MicroCluster.cpp | 4 +- .../DataStructure/WeightedAdjacencyList.cpp | 2 + 5 files changed, 69 insertions(+), 44 deletions(-) diff --git a/include/Algorithm/DBStream.hpp b/include/Algorithm/DBStream.hpp index 85f65211..815334ae 100644 --- a/include/Algorithm/DBStream.hpp +++ b/include/Algorithm/DBStream.hpp @@ -32,6 +32,7 @@ class DBStream : public Algorithm DampedWindowPtr dampedWindow; unordered_set microClusters; SESAME::WeightedAdjacencyList weightedAdjacencyList; + std::vector microClusterNN; double weakEntry;//W_weak, weak entries double aWeakEntry; clock_t startTime; @@ -45,11 +46,10 @@ class DBStream : public Algorithm void runOfflineClustering(DataSinkPtr sinkPtr) override; private: bool isInitial = false; - // vector initialBuffer; void update(PointPtr dataPoint); - void cleanUp(clock_t time); - bool checkMove( std::vector microClusters); + bool checkMove( std::vector microClusters) const; std::vector findFixedRadiusNN(PointPtr dataPoint, double decayFactor); + void cleanUp(clock_t nowTime); }; } diff --git a/include/Algorithm/DataStructure/MicroCluster.hpp b/include/Algorithm/DataStructure/MicroCluster.hpp index 2af59d63..15037343 100644 --- a/include/Algorithm/DataStructure/MicroCluster.hpp +++ b/include/Algorithm/DataStructure/MicroCluster.hpp @@ -72,6 +72,7 @@ class MicroCluster { double getDistance(PointPtr datapoint);//DBStream double getDistance(MicroClusterPtr other);//DBStream void move();//DBStream + void decayWeight(double decayFactor); SESAME::MicroClusterPtr copy(); private: diff --git a/src/Algorithm/DBStream.cpp b/src/Algorithm/DBStream.cpp index 504ef7a9..fa2adead 100644 --- a/src/Algorithm/DBStream.cpp +++ b/src/Algorithm/DBStream.cpp @@ -5,6 +5,8 @@ #include #include +#pragma clang diagnostic push + SESAME::DBStream::DBStream(param_t &cmd_params){ this->dbStreamParams.radius=cmd_params.radius; this->dbStreamParams.lambda=cmd_params.lambda; @@ -18,40 +20,37 @@ SESAME::DBStream:: ~DBStream() void SESAME::DBStream::Initilize() { this->dampedWindow = WindowFactory::createDampedWindow(dbStreamParams.base, dbStreamParams.lambda); this->startTime = clock(); + this->pointArrivingTime= clock(); this->weakEntry= pow(dbStreamParams.base,(-1)*dbStreamParams.lambda*dbStreamParams.cleanUpInterval); this->aWeakEntry=weakEntry*dbStreamParams.alpha; - microClusters=unordered_set(); this->microClusterIndex=-1; } void SESAME::DBStream::runOnlineClustering(PointPtr input) { - input; + if (!this->isInitial) { Initilize(); this->isInitial = true; } else { - + if(input->getIndex()) + this->pointArrivingTime=clock(); + update(input); } } void SESAME::DBStream::runOfflineClustering(DataSinkPtr sinkPtr) { sinkPtr; } void SESAME::DBStream::update(PointPtr dataPoint){ - if(dataPoint->getIndex()==0) - { - this->pointArrivingTime=clock(); - } double decayFactor=dampedWindow->decayFunction(this->pointArrivingTime, clock()); this->pointArrivingTime=clock(); - std::vector microClusterNN=findFixedRadiusNN(dataPoint,decayFactor); - int sizeNN=microClusterNN.size(); - if (microClusterNN.size() < 1) { + this->microClusterNN=findFixedRadiusNN(dataPoint,decayFactor); + std::vector::size_type sizeNN=microClusterNN.size(); + if (microClusterNN.empty()) { microClusterIndex++; - MicroClusterPtr newMicroCluster=SESAME::DataStructureFactory::createMicroCluster(dbStreamParams.dimension, - microClusterIndex,dataPoint, - dbStreamParams.radius); + MicroClusterPtr newMicroCluster=SESAME::DataStructureFactory::createMicroCluster(dbStreamParams.dimension,microClusterIndex, + dataPoint,dbStreamParams.radius); microClusters.insert(newMicroCluster); microClusterNN.push_back(newMicroCluster); } @@ -64,8 +63,8 @@ void SESAME::DBStream::update(PointPtr dataPoint){ MicroClusterPairPtr microClusterPair =SESAME::DataStructureFactory::createMicroClusterPair(microCluster, microClusterNN.at(j)); if (weightedAdjacencyList.find(microClusterPair) != weightedAdjacencyList.end()) { - clock_t startTime= weightedAdjacencyList[microClusterPair]->updateTime; - double decayValue = dampedWindow->decayFunction(startTime,this->pointArrivingTime); + clock_t startT= weightedAdjacencyList[microClusterPair]->updateTime; + double decayValue = dampedWindow->decayFunction(startT,this->pointArrivingTime); weightedAdjacencyList[microClusterPair]->add(this->pointArrivingTime,decayValue); } else { AdjustedWeightPtr adjustedWeight = SESAME::DataStructureFactory::createAdjustedWeight(1,this->pointArrivingTime); @@ -73,41 +72,62 @@ void SESAME::DBStream::update(PointPtr dataPoint){ weightedAdjacencyList.insert(densityGraph); } } - } - if (checkMove(microClusterNN)) { - for (MicroClusterPtr microCluster : microClusterNN) { - microCluster->move(); - } - } - if (pointArrivingTime% dbStreamParams.cleanUpInterval == 0 && dataPoint->getIndex()!=0) { - cleanUp(pointArrivingTime); - } - // return microClusterNN; + if (checkMove(microClusterNN)) + for (const MicroClusterPtr& microCluster : microClusterNN) microCluster->move(); } + if (((pointArrivingTime-this->startTime)/CLOCKS_PER_SEC)% dbStreamParams.cleanUpInterval == 0 && dataPoint->getIndex()!=0) + cleanUp(this->pointArrivingTime); } -bool SESAME::DBStream::checkMove( std::vector microClusters){ - int size = microClusters.size(); - for (int i = 0; i < size; i++) { - for (int j = i + 1; j < size; j++) { - if (microClusters.at(i)->getDistance(microClusters.at(j)) < dbStreamParams.radius) { - return false; + +bool SESAME::DBStream::checkMove( std::vector microClustersList) const +{ + bool move=true; + if(!microClustersList.empty()) + { std::vector::size_type size = microClustersList.size(); + for (int i = 0; i < size; i++){ + for (int j = i + 1; j < size; j++){ + double distance=microClustersList.at(i)->getDistance(microClustersList.at(j)); + if (distance < dbStreamParams.radius) + move= false; } } } - return true; + return move; } -//TODO Stop here std::vector SESAME::DBStream::findFixedRadiusNN(PointPtr dataPoint, double decayFactor) { - std::vector microClusterList; - dataPoint; - decayFactor; - return microClusterList; - + std::vector result; + unordered_set::iterator iter; + for (iter= this->microClusters.begin();iter!= this->microClusters.end();iter++) { + (*iter)->decayWeight(decayFactor); + double distance = (*iter)->getDistance(dataPoint); + if (distance < dbStreamParams.radius) { + result.push_back(*iter); + } + } + return result; } -void SESAME::DBStream::cleanUp(clock_t time){ - time; +void SESAME::DBStream::cleanUp(clock_t nowTime){ + unordered_set removeMicroCluster; + unordered_set::iterator iter; + for (iter = microClusters.begin(); iter != microClusters.end(); iter++) { + if ((*iter)->weight <= this->weakEntry){ + removeMicroCluster.insert((*iter)->copy()); + microClusters.erase(iter); + } + } + WeightedAdjacencyList::iterator interW; + for (interW = weightedAdjacencyList.begin(); interW != weightedAdjacencyList.end(); interW++){ + if (removeMicroCluster.find(interW->first->microCluster1) !=removeMicroCluster.end() + || removeMicroCluster.find(interW->first->microCluster2)!=removeMicroCluster.end()) + weightedAdjacencyList.erase(interW); + else{ + double decayFactor=dampedWindow->decayFunction(interW->second->updateTime,nowTime); + if (interW->second->getCurrentWeight(decayFactor) < aWeakEntry) + weightedAdjacencyList.erase(interW); + } + } } diff --git a/src/Algorithm/DataStructure/MicroCluster.cpp b/src/Algorithm/DataStructure/MicroCluster.cpp index 693eff23..60e0f6f4 100644 --- a/src/Algorithm/DataStructure/MicroCluster.cpp +++ b/src/Algorithm/DataStructure/MicroCluster.cpp @@ -299,7 +299,9 @@ void SESAME::MicroCluster::move(){ this->centroid=this->LS; } - +void SESAME::MicroCluster::decayWeight(double decayFactor){ + this->weight *=decayFactor; +} double SESAME::MicroCluster::inverseError(double x){ double z = sqrt(M_PI) * x; double res = (z) / 2; diff --git a/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp b/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp index b6529409..a5cdadbd 100644 --- a/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp +++ b/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp @@ -33,3 +33,5 @@ void SESAME::AdjustedWeight::add(clock_t startTime,double decayValue) { double SESAME::AdjustedWeight::getCurrentWeight(double decayFactor){ return weight * decayFactor; } + + From 9db285a1d9d105e3aa5be8d5ca168819fa151511 Mon Sep 17 00:00:00 2001 From: s1926539 Date: Wed, 1 Sep 2021 13:00:49 +0800 Subject: [PATCH 11/21] Finish DBStream coding, still has problems in OfflineClustering, how to transfer microCluster into Point --- include/Algorithm/DBStream.hpp | 11 ++- src/Algorithm/DBStream.cpp | 73 ++++++++++++++++++++ src/Algorithm/DataStructure/MicroCluster.cpp | 4 +- 3 files changed, 86 insertions(+), 2 deletions(-) diff --git a/include/Algorithm/DBStream.hpp b/include/Algorithm/DBStream.hpp index 815334ae..683def98 100644 --- a/include/Algorithm/DBStream.hpp +++ b/include/Algorithm/DBStream.hpp @@ -16,6 +16,7 @@ #include #include namespace SESAME { +typedef std::vector> Clusters; class DBStreamParams : public AlgorithmParameters { public: double radius; @@ -32,12 +33,14 @@ class DBStream : public Algorithm DampedWindowPtr dampedWindow; unordered_set microClusters; SESAME::WeightedAdjacencyList weightedAdjacencyList; - std::vector microClusterNN; + std::vector microClusterNN;//micro clusters found in function findFixedRadiusNN double weakEntry;//W_weak, weak entries double aWeakEntry; clock_t startTime; clock_t pointArrivingTime; int microClusterIndex; + //Final output of clusters + Clusters finalClusters; //TODO Need to implement weighted a weighted adjacency list S DBStream(param_t &cmd_params); ~DBStream(); @@ -50,6 +53,12 @@ class DBStream : public Algorithm bool checkMove( std::vector microClusters) const; std::vector findFixedRadiusNN(PointPtr dataPoint, double decayFactor); void cleanUp(clock_t nowTime); + void reCluster(double threshold); + static void insertIntoGraph(unordered_map> connectivityGraph, + MicroClusterPtr microCluster,MicroClusterPtr Other); + static void insertIntoGraph(unordered_map> connectivityGraph, + MicroClusterPtr microCluster); + void findConnectedComponents(unordered_map> connectivityGraph); }; } diff --git a/src/Algorithm/DBStream.cpp b/src/Algorithm/DBStream.cpp index fa2adead..7713aafa 100644 --- a/src/Algorithm/DBStream.cpp +++ b/src/Algorithm/DBStream.cpp @@ -40,8 +40,13 @@ void SESAME::DBStream::Initilize() { } } void SESAME::DBStream::runOfflineClustering(DataSinkPtr sinkPtr) { + sinkPtr; + reCluster(dbStreamParams.alpha); } + + + void SESAME::DBStream::update(PointPtr dataPoint){ double decayFactor=dampedWindow->decayFunction(this->pointArrivingTime, clock()); this->pointArrivingTime=clock(); @@ -131,3 +136,71 @@ void SESAME::DBStream::cleanUp(clock_t nowTime){ } } +void SESAME::DBStream::reCluster(double threshold){ + unordered_map> connectivityGraph;//C in DBStream paper + WeightedAdjacencyList::iterator interW; + for (interW = weightedAdjacencyList.begin(); interW != weightedAdjacencyList.end(); interW++){ + MicroClusterPtr microCluster1 =interW->first->microCluster1->copy(); + MicroClusterPtr microCluster2 =interW->first->microCluster2->copy(); + if (microCluster1->weight >= dbStreamParams.weightMin &µCluster2->weight >= dbStreamParams.weightMin){ + double val = 2*interW->second->weight / (microCluster1->weight+microCluster2->weight); + if (val > threshold) { + insertIntoGraph(connectivityGraph,microCluster1,microCluster2); + insertIntoGraph(connectivityGraph,microCluster2,microCluster1); + } + else + { + insertIntoGraph(connectivityGraph,microCluster1); + insertIntoGraph(connectivityGraph,microCluster2); + } + } + } + findConnectedComponents(connectivityGraph); + +} +void SESAME::DBStream::insertIntoGraph(unordered_map> connectivityGraph, + MicroClusterPtr microCluster,MicroClusterPtr Other){ + if (connectivityGraph.find(microCluster)!=connectivityGraph.end()) + connectivityGraph.find(microCluster)->second.insert(Other); + else{ + microCluster->visited = false; + unordered_set newMicroClusterSet; + newMicroClusterSet.insert(Other); + connectivityGraph.insert(std::make_pair(microCluster,newMicroClusterSet)); + } +} +void SESAME::DBStream::insertIntoGraph(unordered_map> connectivityGraph, + MicroClusterPtr microCluster){ + if (connectivityGraph.find(microCluster)==connectivityGraph.end()) + { + microCluster->visited = false; + unordered_set newMicroClusterSet; + connectivityGraph.insert(std::make_pair(microCluster,newMicroClusterSet)); + } +} + +void SESAME::DBStream::findConnectedComponents(unordered_map> connectivityGraph){ + unordered_map>::iterator inter; + for (inter = connectivityGraph.begin(); inter != connectivityGraph.end(); inter++){ + if (!inter->first->visited) { + std::vector newCluster, clusterGroup; + newCluster.push_back(inter->first); + while (!newCluster.empty()) { + //after found the front one, insert it into clusterGroup and delete from the original vector + MicroClusterPtr microCluster = newCluster.front(); + newCluster.erase(newCluster.begin()); + clusterGroup.push_back(microCluster); + microCluster->visited = true; + + for(const auto & interS : inter->second) + { + if (!interS->visited) + newCluster.push_back(interS); + } + } + this->finalClusters.push_back(clusterGroup); + } + } + +} diff --git a/src/Algorithm/DataStructure/MicroCluster.cpp b/src/Algorithm/DataStructure/MicroCluster.cpp index 60e0f6f4..8df58fa6 100644 --- a/src/Algorithm/DataStructure/MicroCluster.cpp +++ b/src/Algorithm/DataStructure/MicroCluster.cpp @@ -18,6 +18,7 @@ SESAME::MicroCluster::MicroCluster(int dimension, int id) this->createTime=clock(); this->lastUpdateTime=this->createTime; radius=0; + visited=false; } SESAME::MicroCluster::MicroCluster(int dimension, int id,PointPtr dataPoint,double radius){ this->dimension=dimension; @@ -25,7 +26,7 @@ SESAME::MicroCluster::MicroCluster(int dimension, int id,PointPtr dataPoint,doub this->id.push_back(id); LST=0; SST=0; - this->visited=false; + this->visited=true; this->createTime=clock(); this->lastUpdateTime=this->createTime; this->radius=radius; @@ -34,6 +35,7 @@ SESAME::MicroCluster::MicroCluster(int dimension, int id,PointPtr dataPoint,doub LS.push_back(data); centroid.push_back(data); } + } //Release memory of the current micro cluster From b46c6fcd3975b95e82f0aaae22b8df79973f2b82 Mon Sep 17 00:00:00 2001 From: Shuhao Zhang Date: Tue, 31 Aug 2021 00:56:10 +0800 Subject: [PATCH 12/21] Fix Time interval bugs in DenStream and CluStream --- include/Algorithm/CluStream.hpp | 1 + src/Algorithm/CluStream.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/Algorithm/CluStream.hpp b/include/Algorithm/CluStream.hpp index c006554b..fd45606f 100644 --- a/include/Algorithm/CluStream.hpp +++ b/include/Algorithm/CluStream.hpp @@ -44,6 +44,7 @@ class CluStream : public Algorithm { int pointsForgot; int pointsMerged; clock_t startTime; + clock_t lastUpdateTime; CluStream(param_t &cmd_params); ~CluStream(); diff --git a/src/Algorithm/CluStream.cpp b/src/Algorithm/CluStream.cpp index 72626fba..7b243dcc 100644 --- a/src/Algorithm/CluStream.cpp +++ b/src/Algorithm/CluStream.cpp @@ -200,6 +200,7 @@ void SESAME::CluStream::Initilize() { this->window = WindowFactory::createLandmarkWindow(); this->window->pyramidalWindow.timeInterval = this->CluStreamParam.timeInterval; this->startTime = clock(); + this->lastUpdateTime=this->startTime; window->initPyramidalWindow(this->window->pyramidalWindow.timeInterval); } @@ -233,13 +234,12 @@ void SESAME::CluStream::runOnlineClustering(SESAME::PointPtr input) { } } else { int interval; - clock_t lastTime = clock(); clock_t now = clock(); - interval = (int) ((now - lastTime) / CLOCKS_PER_SEC); - if (interval >= 1)// + interval = (int) ((now - lastUpdateTime) / CLOCKS_PER_SEC); + if (interval >= 1) { window->pyramidalWindowProcess(startTime, microClusters); - lastTime = now; + lastUpdateTime = now; } incrementalCluster(input); From 759d610426f50be42fd21da59d7c5ebb5e3e6b35 Mon Sep 17 00:00:00 2001 From: s1926539 Date: Fri, 17 Sep 2021 17:08:27 +0800 Subject: [PATCH 13/21] Fix Time interval bugs in DenStream and CluStream --- include/Algorithm/Algorithm.hpp | 2 +- src/Algorithm/AlgorithmFactory.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/Algorithm/Algorithm.hpp b/include/Algorithm/Algorithm.hpp index 7a2f7a8e..85f60152 100644 --- a/include/Algorithm/Algorithm.hpp +++ b/include/Algorithm/Algorithm.hpp @@ -14,7 +14,7 @@ using namespace std; namespace SESAME { -enum algoType { BirchType, StreamKMeansType, CluStreamType, DenStreamType }; +enum algoType { BirchType, StreamKMeansType, CluStreamType, DenStreamType, DBStreamType}; class Algorithm; typedef std::shared_ptr AlgorithmPtr; diff --git a/src/Algorithm/AlgorithmFactory.cpp b/src/Algorithm/AlgorithmFactory.cpp index e61c226f..89e11085 100644 --- a/src/Algorithm/AlgorithmFactory.cpp +++ b/src/Algorithm/AlgorithmFactory.cpp @@ -28,7 +28,7 @@ SESAME::AlgorithmPtr SESAME::AlgorithmFactory::create(param_t &cmd_params) { shared_ptr denStream = std::make_shared(cmd_params); return (SESAME::AlgorithmPtr) denStream; } - if (cmd_params.algoName == "DBStream") { + if (cmd_params.algoType == DBStreamType) { shared_ptr dbStream = std::make_shared(cmd_params); return (SESAME::AlgorithmPtr) dbStream; } From 2c0b0cd6a84a83fcf80d012625f2fc47a92d7d5d Mon Sep 17 00:00:00 2001 From: s1926539 Date: Sat, 18 Sep 2021 15:07:31 +0800 Subject: [PATCH 14/21] modify Micro cluster insert data function and debug online part --- include/Algorithm/DBStream.hpp | 5 +- .../Algorithm/DataStructure/MicroCluster.hpp | 2 +- src/Algorithm/DBStream.cpp | 107 ++++++++++++------ src/Algorithm/DataStructure/MicroCluster.cpp | 23 +++- 4 files changed, 96 insertions(+), 41 deletions(-) diff --git a/include/Algorithm/DBStream.hpp b/include/Algorithm/DBStream.hpp index 683def98..9d9d8201 100644 --- a/include/Algorithm/DBStream.hpp +++ b/include/Algorithm/DBStream.hpp @@ -31,13 +31,14 @@ class DBStream : public Algorithm public: DBStreamParams dbStreamParams; DampedWindowPtr dampedWindow; - unordered_set microClusters; + std::vector microClusters; SESAME::WeightedAdjacencyList weightedAdjacencyList; std::vector microClusterNN;//micro clusters found in function findFixedRadiusNN double weakEntry;//W_weak, weak entries double aWeakEntry; clock_t startTime; clock_t pointArrivingTime; + clock_t lastCleanTime; int microClusterIndex; //Final output of clusters Clusters finalClusters; @@ -51,7 +52,7 @@ class DBStream : public Algorithm bool isInitial = false; void update(PointPtr dataPoint); bool checkMove( std::vector microClusters) const; - std::vector findFixedRadiusNN(PointPtr dataPoint, double decayFactor); + std::vector findFixedRadiusNN(PointPtr dataPoint); void cleanUp(clock_t nowTime); void reCluster(double threshold); static void insertIntoGraph(unordered_map> connectivityGraph, diff --git a/include/Algorithm/DataStructure/MicroCluster.hpp b/include/Algorithm/DataStructure/MicroCluster.hpp index 15037343..6e6e8160 100644 --- a/include/Algorithm/DataStructure/MicroCluster.hpp +++ b/include/Algorithm/DataStructure/MicroCluster.hpp @@ -50,7 +50,7 @@ class MicroCluster { void init(PointPtr datapoint, int timestamp); void insert(PointPtr datapoint, int timestamp);//Used in CluStream bool insert(PointPtr datapoint,double decayFactor,double epsilon);// DenStream - void insert(PointPtr datapoint);//DBStream + void insert(PointPtr datapoint, double decayFactor);//DBStream void merge(MicroClusterPtr other); void subtractClusterVector(MicroClusterPtr other); void updateId(MicroClusterPtr other); diff --git a/src/Algorithm/DBStream.cpp b/src/Algorithm/DBStream.cpp index 7713aafa..95f99317 100644 --- a/src/Algorithm/DBStream.cpp +++ b/src/Algorithm/DBStream.cpp @@ -5,8 +5,17 @@ #include #include -#pragma clang diagnostic push - +/** + * @Description: initialize user defined parameters, + * @Param: + * radius: radius of micro clusters + * lambda: lambda in decay function + * cleanUpInterval: time gap of clean up + * weightMin: the minimum weight of micro cluster to identify noise MCs + * alpha: intersection factor + * base: decay function base -- Normally 2 + * @Return: void + */ SESAME::DBStream::DBStream(param_t &cmd_params){ this->dbStreamParams.radius=cmd_params.radius; this->dbStreamParams.lambda=cmd_params.lambda; @@ -17,15 +26,26 @@ SESAME::DBStream::DBStream(param_t &cmd_params){ } SESAME::DBStream:: ~DBStream() = default; + +/** + * @Description: initialization of the algorithm, + * @Param: void + * @Return: void + */ void SESAME::DBStream::Initilize() { this->dampedWindow = WindowFactory::createDampedWindow(dbStreamParams.base, dbStreamParams.lambda); this->startTime = clock(); this->pointArrivingTime= clock(); + this->lastCleanTime=clock(); this->weakEntry= pow(dbStreamParams.base,(-1)*dbStreamParams.lambda*dbStreamParams.cleanUpInterval); this->aWeakEntry=weakEntry*dbStreamParams.alpha; this->microClusterIndex=-1; } - + /** + * @Description: online clustering stage, input data point incrementally and update the MC list and weight adjacency lists, + * @Param: void + * @Return: void + */ void SESAME::DBStream::runOnlineClustering(PointPtr input) { if (!this->isInitial) { @@ -45,51 +65,84 @@ void SESAME::DBStream::runOfflineClustering(DataSinkPtr sinkPtr) { reCluster(dbStreamParams.alpha); } - +/** + * @Description: Insert data point into existing MCs, + * first find the MCs which data point locates in, if finding no MCs, + * create new MC with this data point, else if finding MCs can accept this data + * update these MCs and the corresponding Sij in Weighted adjacency list S; + * After inserting, we check whether moving center of MCs will collapse, + * if it will, we roll back moving center actions, + * finally, we clean up the MCs which is less than Wmin + * @Param: data point + * @Return: void + */ void SESAME::DBStream::update(PointPtr dataPoint){ double decayFactor=dampedWindow->decayFunction(this->pointArrivingTime, clock()); this->pointArrivingTime=clock(); - this->microClusterNN=findFixedRadiusNN(dataPoint,decayFactor); + this->microClusterNN=findFixedRadiusNN(dataPoint); std::vector::size_type sizeNN=microClusterNN.size(); if (microClusterNN.empty()) { microClusterIndex++; MicroClusterPtr newMicroCluster=SESAME::DataStructureFactory::createMicroCluster(dbStreamParams.dimension,microClusterIndex, dataPoint,dbStreamParams.radius); - microClusters.insert(newMicroCluster); + microClusters.push_back(newMicroCluster); microClusterNN.push_back(newMicroCluster); } else { for (int i = 0; i < sizeNN; i++) { MicroClusterPtr microCluster = microClusterNN.at(i); - microCluster->insert(dataPoint); // just update weight + microCluster->insert(dataPoint,decayFactor); // just update weight // update shared density for (int j = i + 1; j < sizeNN; j++) { MicroClusterPairPtr microClusterPair =SESAME::DataStructureFactory::createMicroClusterPair(microCluster, microClusterNN.at(j)); - if (weightedAdjacencyList.find(microClusterPair) != weightedAdjacencyList.end()) { + if (weightedAdjacencyList.find(microClusterPair) != weightedAdjacencyList.end()) + { clock_t startT= weightedAdjacencyList[microClusterPair]->updateTime; double decayValue = dampedWindow->decayFunction(startT,this->pointArrivingTime); weightedAdjacencyList[microClusterPair]->add(this->pointArrivingTime,decayValue); - } else { + } + else + { AdjustedWeightPtr adjustedWeight = SESAME::DataStructureFactory::createAdjustedWeight(1,this->pointArrivingTime); DensityGraph densityGraph(microClusterPair,adjustedWeight); weightedAdjacencyList.insert(densityGraph); } + } } if (checkMove(microClusterNN)) for (const MicroClusterPtr& microCluster : microClusterNN) microCluster->move(); } - if (((pointArrivingTime-this->startTime)/CLOCKS_PER_SEC)% dbStreamParams.cleanUpInterval == 0 && dataPoint->getIndex()!=0) - cleanUp(this->pointArrivingTime); + if (((pointArrivingTime-this->lastCleanTime)/CLOCKS_PER_SEC)>= dbStreamParams.cleanUpInterval && dataPoint->getIndex()!=0) + { + cleanUp(this->pointArrivingTime); + this->lastCleanTime=this->pointArrivingTime; + } +} + + +std::vector SESAME::DBStream::findFixedRadiusNN(PointPtr dataPoint) +{ + std::vector result; + std::vector::size_type iter; + for (iter= 0;iter< microClusters.size();iter++) { + //microClusters.at(iter)>decayWeight(decayFactor); //add this line into Micro Cluster insert data functions + double distance = microClusters.at(iter)->getDistance(dataPoint); + if (distance < dbStreamParams.radius) + result.push_back(microClusters.at(iter)); + } + return result; } + bool SESAME::DBStream::checkMove( std::vector microClustersList) const { bool move=true; if(!microClustersList.empty()) - { std::vector::size_type size = microClustersList.size(); + { + std::vector::size_type size = microClustersList.size(); for (int i = 0; i < size; i++){ for (int j = i + 1; j < size; j++){ double distance=microClustersList.at(i)->getDistance(microClustersList.at(j)); @@ -101,32 +154,20 @@ bool SESAME::DBStream::checkMove( std::vector microClustersList return move; } -std::vector SESAME::DBStream::findFixedRadiusNN(PointPtr dataPoint, double decayFactor) -{ - std::vector result; - unordered_set::iterator iter; - for (iter= this->microClusters.begin();iter!= this->microClusters.end();iter++) { - (*iter)->decayWeight(decayFactor); - double distance = (*iter)->getDistance(dataPoint); - if (distance < dbStreamParams.radius) { - result.push_back(*iter); - } - } - return result; -} + void SESAME::DBStream::cleanUp(clock_t nowTime){ - unordered_set removeMicroCluster; - unordered_set::iterator iter; - for (iter = microClusters.begin(); iter != microClusters.end(); iter++) { - if ((*iter)->weight <= this->weakEntry){ - removeMicroCluster.insert((*iter)->copy()); - microClusters.erase(iter); + std::vector removeMicroCluster; + std::vector::size_type iter; + for (iter=0;iterweight <= this->weakEntry){ + removeMicroCluster.push_back(microClusters.at(iter)->copy()); + microClusters.erase(microClusters.begin()+iter);//Delete this MC from current MC list } } WeightedAdjacencyList::iterator interW; for (interW = weightedAdjacencyList.begin(); interW != weightedAdjacencyList.end(); interW++){ - if (removeMicroCluster.find(interW->first->microCluster1) !=removeMicroCluster.end() - || removeMicroCluster.find(interW->first->microCluster2)!=removeMicroCluster.end()) + if (std::find(removeMicroCluster.begin(),removeMicroCluster.end(),interW->first->microCluster1) !=removeMicroCluster.end() + || std::find(removeMicroCluster.begin(),removeMicroCluster.end(),interW->first->microCluster2)!=removeMicroCluster.end()) weightedAdjacencyList.erase(interW); else{ double decayFactor=dampedWindow->decayFunction(interW->second->updateTime,nowTime); diff --git a/src/Algorithm/DataStructure/MicroCluster.cpp b/src/Algorithm/DataStructure/MicroCluster.cpp index 8df58fa6..1b43ac76 100644 --- a/src/Algorithm/DataStructure/MicroCluster.cpp +++ b/src/Algorithm/DataStructure/MicroCluster.cpp @@ -6,7 +6,7 @@ #include #include #include - +//Create MC, only initialization, used for DenStream, CluStream SESAME::MicroCluster::MicroCluster(int dimension, int id) { this->dimension=dimension; @@ -20,6 +20,7 @@ SESAME::MicroCluster::MicroCluster(int dimension, int id) radius=0; visited=false; } +//Create MC, only initialization, only used for DBStream as it has user-defined fixed radius SESAME::MicroCluster::MicroCluster(int dimension, int id,PointPtr dataPoint,double radius){ this->dimension=dimension; weight=1; @@ -46,6 +47,8 @@ SESAME::MicroCluster::~MicroCluster() std::vector ().swap(LS); std::vector ().swap(SS); } + +//Used in DenStream, DBStream void SESAME::MicroCluster::init(PointPtr datapoint,int timestamp) { weight++; @@ -59,7 +62,7 @@ void SESAME::MicroCluster::init(PointPtr datapoint,int timestamp) SST+=timestamp*timestamp; } - +//Used in DenStream, DBStream //insert a new data point from input data stream void SESAME::MicroCluster::insert(PointPtr datapoint,int timestamp) { @@ -74,19 +77,26 @@ void SESAME::MicroCluster::insert(PointPtr datapoint,int timestamp) SST+=timestamp*timestamp; centroid=std::move(getCentroid()); } -void SESAME::MicroCluster::insert(PointPtr datapoint) + +//Used only in DBStream +void SESAME::MicroCluster::insert(PointPtr datapoint,double decayFactor) { + decayWeight(decayFactor); weight++; double val = exp(-(pow(3 * this->distance / radius, 2) / 2)); - for(int i=0; igetFeatureItem(i); LS[i] = centroid.at(i) + val * (data - centroid.at(i)); } + lastUpdateTime=clock(); } + double SESAME::MicroCluster::getDistance(PointPtr datapoint){ this->distance=calCentroidDistance(datapoint); return this->distance; } +//Often Used only in DBStream TODO this just a note, need to delete or detailed explain later double SESAME::MicroCluster::getDistance(MicroClusterPtr other){ double temp = 0, dist = 0; for (int i = 0; i < this->dimension; i++) { @@ -95,7 +105,7 @@ double SESAME::MicroCluster::getDistance(MicroClusterPtr other){ } return sqrt(dist); } - +//Used in DenStream bool SESAME::MicroCluster::insert(PointPtr datapoint,double decayFactor,double epsilon){ bool result; dataPoint LSPre; LSPre.assign(this->LS.begin(),this->LS.end()); @@ -123,6 +133,9 @@ bool SESAME::MicroCluster::insert(PointPtr datapoint,double decayFactor,double e result=false; return result; } + + + //merge two micro-clusters void SESAME::MicroCluster::merge(MicroClusterPtr other){ weight+=other->weight; From e5f11dd43b277224d77116fd4f91caca26306c32 Mon Sep 17 00:00:00 2001 From: s1926539 Date: Sun, 19 Sep 2021 00:43:16 +0800 Subject: [PATCH 15/21] modify Micro cluster insert data function and debug online part --- include/Algorithm/DBStream.hpp | 6 +- src/Algorithm/CluStream.cpp | 2 +- src/Algorithm/DBStream.cpp | 125 ++++++++++++++----- src/Algorithm/DataStructure/MicroCluster.cpp | 7 +- test/CMakeLists.txt | 9 +- test/SystemTest/DBStreamTest.cpp | 49 ++++++++ 6 files changed, 155 insertions(+), 43 deletions(-) create mode 100644 test/SystemTest/DBStreamTest.cpp diff --git a/include/Algorithm/DBStream.hpp b/include/Algorithm/DBStream.hpp index 9d9d8201..0464e5af 100644 --- a/include/Algorithm/DBStream.hpp +++ b/include/Algorithm/DBStream.hpp @@ -55,11 +55,11 @@ class DBStream : public Algorithm std::vector findFixedRadiusNN(PointPtr dataPoint); void cleanUp(clock_t nowTime); void reCluster(double threshold); - static void insertIntoGraph(unordered_map> connectivityGraph, + static void insertIntoGraph(unordered_map> connectivityGraph, MicroClusterPtr microCluster,MicroClusterPtr Other); - static void insertIntoGraph(unordered_map> connectivityGraph, + static void insertIntoGraph(unordered_map> connectivityGraph, MicroClusterPtr microCluster); - void findConnectedComponents(unordered_map> connectivityGraph); + void findConnectedComponents(unordered_map> connectivityGraph); }; } diff --git a/src/Algorithm/CluStream.cpp b/src/Algorithm/CluStream.cpp index 7b243dcc..53c16226 100644 --- a/src/Algorithm/CluStream.cpp +++ b/src/Algorithm/CluStream.cpp @@ -289,7 +289,7 @@ void SESAME::CluStream::runOfflineClustering(SESAME::DataSinkPtr sinkPtr) { } SESAME_INFO("subtract Miro Cluster is..."); for (int i = 0; i < CluStreamParam.clusterNumber; i++) { - std::stringstream result, re2; + std::stringstream re2; std::copy(subtractMiroCluster->microClusters[i]->id.begin(), subtractMiroCluster->microClusters[i]->id.end(), std::ostream_iterator(re2, " ")); diff --git a/src/Algorithm/DBStream.cpp b/src/Algorithm/DBStream.cpp index 95f99317..39924944 100644 --- a/src/Algorithm/DBStream.cpp +++ b/src/Algorithm/DBStream.cpp @@ -1,5 +1,5 @@ // -// Created by 1124a on 2021/8/30. +// Created by Zhenyu on 2021/8/30. // #include #include @@ -17,6 +17,8 @@ * @Return: void */ SESAME::DBStream::DBStream(param_t &cmd_params){ + this->dbStreamParams.pointNumber = cmd_params.pointNumber; + this->dbStreamParams.dimension = cmd_params.dimension; this->dbStreamParams.radius=cmd_params.radius; this->dbStreamParams.lambda=cmd_params.lambda; this->dbStreamParams.cleanUpInterval=cmd_params.cleanUpInterval; @@ -47,20 +49,21 @@ void SESAME::DBStream::Initilize() { * @Return: void */ void SESAME::DBStream::runOnlineClustering(PointPtr input) { - if (!this->isInitial) { + SESAME_INFO("Start initialize..."); Initilize(); this->isInitial = true; + } else { if(input->getIndex()) - this->pointArrivingTime=clock(); + this->pointArrivingTime=clock(); update(input); } } void SESAME::DBStream::runOfflineClustering(DataSinkPtr sinkPtr) { - + SESAME_INFO("Start offline..."); sinkPtr; reCluster(dbStreamParams.alpha); } @@ -79,35 +82,48 @@ void SESAME::DBStream::runOfflineClustering(DataSinkPtr sinkPtr) { void SESAME::DBStream::update(PointPtr dataPoint){ double decayFactor=dampedWindow->decayFunction(this->pointArrivingTime, clock()); + // SESAME_INFO("decayFactor is..."<pointArrivingTime=clock(); this->microClusterNN=findFixedRadiusNN(dataPoint); std::vector::size_type sizeNN=microClusterNN.size(); + SESAME_INFO("find suitable MCs number : "<id.front()); MicroClusterPtr microCluster = microClusterNN.at(i); microCluster->insert(dataPoint,decayFactor); // just update weight // update shared density for (int j = i + 1; j < sizeNN; j++) { MicroClusterPairPtr microClusterPair =SESAME::DataStructureFactory::createMicroClusterPair(microCluster, microClusterNN.at(j)); - if (weightedAdjacencyList.find(microClusterPair) != weightedAdjacencyList.end()) + MicroClusterPairPtr microClusterPair2 =SESAME::DataStructureFactory::createMicroClusterPair(microClusterNN.at(j),microCluster); + SESAME_INFO("pair "<< microClusterNN.at(i)->id.front()<<" "<< microClusterNN.at(j)->id.front()); + if (weightedAdjacencyList.find(microClusterPair) != weightedAdjacencyList.end()|| + weightedAdjacencyList.find(microClusterPair2) != weightedAdjacencyList.end()) { + SESAME_INFO("find microClusterPair!"<id.front()<<", "<id.front()); clock_t startT= weightedAdjacencyList[microClusterPair]->updateTime; double decayValue = dampedWindow->decayFunction(startT,this->pointArrivingTime); weightedAdjacencyList[microClusterPair]->add(this->pointArrivingTime,decayValue); + //SESAME_INFO("pair weight = "<weight ); } else { + //SESAME_INFO("Create microClusterPair!"); AdjustedWeightPtr adjustedWeight = SESAME::DataStructureFactory::createAdjustedWeight(1,this->pointArrivingTime); DensityGraph densityGraph(microClusterPair,adjustedWeight); weightedAdjacencyList.insert(densityGraph); + // SESAME_INFO("pair weight = "<weight ); } } @@ -117,6 +133,7 @@ void SESAME::DBStream::update(PointPtr dataPoint){ } if (((pointArrivingTime-this->lastCleanTime)/CLOCKS_PER_SEC)>= dbStreamParams.cleanUpInterval && dataPoint->getIndex()!=0) { + cleanUp(this->pointArrivingTime); this->lastCleanTime=this->pointArrivingTime; } @@ -130,6 +147,7 @@ std::vector SESAME::DBStream::findFixedRadiusNN(PointPt for (iter= 0;iter< microClusters.size();iter++) { //microClusters.at(iter)>decayWeight(decayFactor); //add this line into Micro Cluster insert data functions double distance = microClusters.at(iter)->getDistance(dataPoint); + // SESAME_INFO("distance is "< microClustersList bool move=true; if(!microClustersList.empty()) { - std::vector::size_type size = microClustersList.size(); - for (int i = 0; i < size; i++){ - for (int j = i + 1; j < size; j++){ + std::vector::size_type i ,j ; + for ( i = 0; i < microClustersList.size(); i++){ + for (j = i + 1; j < microClustersList.size(); j++){ double distance=microClustersList.at(i)->getDistance(microClustersList.at(j)); if (distance < dbStreamParams.radius) move= false; } } } + else + move=false; return move; } @@ -158,27 +178,40 @@ bool SESAME::DBStream::checkMove( std::vector microClustersList void SESAME::DBStream::cleanUp(clock_t nowTime){ std::vector removeMicroCluster; std::vector::size_type iter; - for (iter=0;iterweight <= this->weakEntry){ + //Check the current micro Clusters whether they have weak MCs + for (iter=0;iterweight <= this->weakEntry) + { removeMicroCluster.push_back(microClusters.at(iter)->copy()); - microClusters.erase(microClusters.begin()+iter);//Delete this MC from current MC list + microClusters.erase(microClusters.begin()+int(iter));//Delete this MC from current MC list } } + SESAME_INFO("CLEAN MCs Already."); WeightedAdjacencyList::iterator interW; - for (interW = weightedAdjacencyList.begin(); interW != weightedAdjacencyList.end(); interW++){ + //Check the current shared density graph whether they have weak entries + if(!weightedAdjacencyList.empty()) + { + for (interW = weightedAdjacencyList.begin(); interW != weightedAdjacencyList.end(); interW++) + { if (std::find(removeMicroCluster.begin(),removeMicroCluster.end(),interW->first->microCluster1) !=removeMicroCluster.end() || std::find(removeMicroCluster.begin(),removeMicroCluster.end(),interW->first->microCluster2)!=removeMicroCluster.end()) - weightedAdjacencyList.erase(interW); + { + weightedAdjacencyList.erase(interW); SESAME_INFO("CLEAN existing entries.");} else{ + double decayFactor=dampedWindow->decayFunction(interW->second->updateTime,nowTime); if (interW->second->getCurrentWeight(decayFactor) < aWeakEntry) weightedAdjacencyList.erase(interW); } } + + } + } void SESAME::DBStream::reCluster(double threshold){ - unordered_map> connectivityGraph;//C in DBStream paper + unordered_map> connectivityGraph;//Connectivity graph C in DBStream paper WeightedAdjacencyList::iterator interW; for (interW = weightedAdjacencyList.begin(); interW != weightedAdjacencyList.end(); interW++){ MicroClusterPtr microCluster1 =interW->first->microCluster1->copy(); @@ -199,49 +232,75 @@ void SESAME::DBStream::reCluster(double threshold){ findConnectedComponents(connectivityGraph); } -void SESAME::DBStream::insertIntoGraph(unordered_map> connectivityGraph, +/** + * @Description: insert vertices and entries into connectivity graph when micro cluster pair + * connectivity value greater than the intersection threshold + * if the graph has testing micro cluster, add connected strong MC in the corresponding entries + * else, create new V,E into the graph + * @Param: connectivity graph, micro cluster 1 and 2 + * @Return: void + */ + +void SESAME::DBStream::insertIntoGraph(unordered_map> connectivityGraph, MicroClusterPtr microCluster,MicroClusterPtr Other){ if (connectivityGraph.find(microCluster)!=connectivityGraph.end()) - connectivityGraph.find(microCluster)->second.insert(Other); + connectivityGraph.find(microCluster)->second.push_back(Other); else{ microCluster->visited = false; - unordered_set newMicroClusterSet; - newMicroClusterSet.insert(Other); + std::vector newMicroClusterSet; + newMicroClusterSet.push_back(Other); connectivityGraph.insert(std::make_pair(microCluster,newMicroClusterSet)); } } -void SESAME::DBStream::insertIntoGraph(unordered_map> connectivityGraph, + + +void SESAME::DBStream::insertIntoGraph(unordered_map> connectivityGraph, MicroClusterPtr microCluster){ if (connectivityGraph.find(microCluster)==connectivityGraph.end()) { microCluster->visited = false; - unordered_set newMicroClusterSet; + std::vector newMicroClusterSet; connectivityGraph.insert(std::make_pair(microCluster,newMicroClusterSet)); } } +/** + * @Description: findConnectedComponents function visit the existing connectivity graph + * and find all connected strong MCs that will finally form arbitrary-shaped macro clusters + * each macro cluster will be stored as a vector of micro clusters, which will be transformed into + * point that stores in sink later + * @Param: connectivity graph + * @Return: void + */ void SESAME::DBStream::findConnectedComponents(unordered_map> connectivityGraph){ - unordered_map>::iterator inter; - for (inter = connectivityGraph.begin(); inter != connectivityGraph.end(); inter++){ - if (!inter->first->visited) { + std::vector> connectivityGraph){ + unordered_map>::iterator iter; + //This variable just for indicating the id of micro cluster which forming macro clusters + std::vector idList; + for (iter = connectivityGraph.begin(); iter != connectivityGraph.end(); iter++){ + if (!iter->first->visited) { std::vector newCluster, clusterGroup; - newCluster.push_back(inter->first); - while (!newCluster.empty()) { + newCluster.push_back(iter->first); + for(const auto & iterS : iter->second) + { + if (!iterS->visited) + newCluster.push_back(iterS); + } + SESAME_INFO("New formed macro cluster ... including micro cluster :"); + while (!newCluster.empty()) + { //after found the front one, insert it into clusterGroup and delete from the original vector MicroClusterPtr microCluster = newCluster.front(); newCluster.erase(newCluster.begin()); clusterGroup.push_back(microCluster); + idList.push_back(microCluster->id.front()); microCluster->visited = true; - - for(const auto & interS : inter->second) - { - if (!interS->visited) - newCluster.push_back(interS); - } } this->finalClusters.push_back(clusterGroup); + //just used for examine reform ,need to delete later + std::stringstream result; + std::copy(idList.begin(),idList.end(),std::ostream_iterator(result, " ")); + SESAME_INFO(" " << result.str() ); } } - } diff --git a/src/Algorithm/DataStructure/MicroCluster.cpp b/src/Algorithm/DataStructure/MicroCluster.cpp index 1b43ac76..6f7a436c 100644 --- a/src/Algorithm/DataStructure/MicroCluster.cpp +++ b/src/Algorithm/DataStructure/MicroCluster.cpp @@ -27,16 +27,19 @@ SESAME::MicroCluster::MicroCluster(int dimension, int id,PointPtr dataPoint,doub this->id.push_back(id); LST=0; SST=0; - this->visited=true; + this->visited=false; this->createTime=clock(); this->lastUpdateTime=this->createTime; this->radius=radius; - for (int i = 0; i < dimension; i++) { + + for (int i = 0; i < this->dimension; i++) + { double data = dataPoint->getFeatureItem(i); LS.push_back(data); centroid.push_back(data); } + } //Release memory of the current micro cluster diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 3deaa2e6..edc6fca2 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,9 +1,10 @@ # adding the Google_Tests_run target add_executable(Google_Tests_run - SystemTest/CluStreamTest.cpp - SystemTest/StreamKMTest.cpp - SystemTest/BirchTest.cpp - SystemTest/DenStreamTest.cpp + #SystemTest/CluStreamTest.cpp + #SystemTest/StreamKMTest.cpp + #SystemTest/BirchTest.cpp + #SystemTest/DenStreamTest.cpp + SystemTest/DBStreamTest.cpp ) # linking Google_Tests_run with sesame_lib which will be tested diff --git a/test/SystemTest/DBStreamTest.cpp b/test/SystemTest/DBStreamTest.cpp new file mode 100644 index 00000000..6d9a099a --- /dev/null +++ b/test/SystemTest/DBStreamTest.cpp @@ -0,0 +1,49 @@ +// Copyright (C) 2021 by the IntelliStream team (https://github.com/intellistream) + +// +// Created by Zhenyu on 2021/9/18. +// + + +#include +#include +#include +#include +#include +#include +#include + +TEST(SystemTest, DBStreamTest) { + //Setup Logs. + setupLogging("benchmark.log", LOG_DEBUG); + //Parse parameters. + param_t cmd_params; + cmd_params.pointNumber = 15120; + cmd_params.dimension = 54; + cmd_params.base=2; + cmd_params.lambda= 0.125; + cmd_params.radius= 1500; + cmd_params.cleanUpInterval=30; + cmd_params.weightMin=2; + cmd_params.alpha=0.3; + cmd_params.inputPath = std::filesystem::current_path().generic_string() + "/datasets/CoverType.txt"; + cmd_params.outputPath = "results.txt"; + cmd_params.algoType = SESAME::DBStreamType; + + std::vector input; + std::vector results; + + //Create Spout. + SESAME::DataSourcePtr sourcePtr = SESAME::DataSourceFactory::create(); + //Directly load data from file. TODO: configure it to load from external sensors, e.g., HTTP. + BenchmarkUtils::loadData(cmd_params, sourcePtr); + + //Create Sink. + SESAME::DataSinkPtr sinkPtr = SESAME::DataSinkFactory::create(); + + //Create Algorithm. + SESAME::AlgorithmPtr algoPtr = SESAME::AlgorithmFactory::create(cmd_params); + + //Run algorithm producing results. + BenchmarkUtils::runBenchmark(cmd_params, sourcePtr, sinkPtr, algoPtr); +} \ No newline at end of file From 681e950f6d21ff9cd5adcd4b90ab71f32088215d Mon Sep 17 00:00:00 2001 From: s1926539 Date: Sun, 19 Sep 2021 11:28:07 +0800 Subject: [PATCH 16/21] all test passed but still have bugs in Unordered_map using customed defined class --- include/Algorithm/DBStream.hpp | 1 + .../DataStructure/WeightedAdjacencyList.hpp | 14 ++++++-- src/Algorithm/DBStream.cpp | 32 ++++++++++--------- .../DataStructure/WeightedAdjacencyList.cpp | 7 ++-- 4 files changed, 33 insertions(+), 21 deletions(-) diff --git a/include/Algorithm/DBStream.hpp b/include/Algorithm/DBStream.hpp index 0464e5af..c596bc0a 100644 --- a/include/Algorithm/DBStream.hpp +++ b/include/Algorithm/DBStream.hpp @@ -60,6 +60,7 @@ class DBStream : public Algorithm static void insertIntoGraph(unordered_map> connectivityGraph, MicroClusterPtr microCluster); void findConnectedComponents(unordered_map> connectivityGraph); + }; } diff --git a/include/Algorithm/DataStructure/WeightedAdjacencyList.hpp b/include/Algorithm/DataStructure/WeightedAdjacencyList.hpp index e7566e37..7c079ebf 100644 --- a/include/Algorithm/DataStructure/WeightedAdjacencyList.hpp +++ b/include/Algorithm/DataStructure/WeightedAdjacencyList.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -20,7 +21,14 @@ class MicroClusterPair{ MicroClusterPtr microCluster1; MicroClusterPtr microCluster2; MicroClusterPair( MicroClusterPtr microCluster1,MicroClusterPtr microCluster2); - bool equal(MicroClusterPairPtr other); + bool operator==(const MicroClusterPair &other) const; +}; + +struct hashMicroClusterPair{ + size_t operator()(const MicroClusterPair µClusterPair) const + { + return (std::hash()(microClusterPair.microCluster1)) + (std::hash()(microClusterPair.microCluster2)); + } }; class AdjustedWeight; @@ -34,8 +42,8 @@ class AdjustedWeight{ double getCurrentWeight(double decayFactor); }; -typedef std::unordered_map WeightedAdjacencyList; -typedef std::pair DensityGraph; +typedef std::unordered_map WeightedAdjacencyList; +typedef std::pair DensityGraph; //S in paper, represent Weighted Adjacency List } diff --git a/src/Algorithm/DBStream.cpp b/src/Algorithm/DBStream.cpp index 39924944..77c70d11 100644 --- a/src/Algorithm/DBStream.cpp +++ b/src/Algorithm/DBStream.cpp @@ -86,7 +86,7 @@ void SESAME::DBStream::update(PointPtr dataPoint){ this->pointArrivingTime=clock(); this->microClusterNN=findFixedRadiusNN(dataPoint); std::vector::size_type sizeNN=microClusterNN.size(); - SESAME_INFO("find suitable MCs number : "<insert(dataPoint,decayFactor); // just update weight // update shared density for (int j = i + 1; j < sizeNN; j++) { - MicroClusterPairPtr microClusterPair =SESAME::DataStructureFactory::createMicroClusterPair(microCluster, - microClusterNN.at(j)); - MicroClusterPairPtr microClusterPair2 =SESAME::DataStructureFactory::createMicroClusterPair(microClusterNN.at(j),microCluster); - SESAME_INFO("pair "<< microClusterNN.at(i)->id.front()<<" "<< microClusterNN.at(j)->id.front()); - if (weightedAdjacencyList.find(microClusterPair) != weightedAdjacencyList.end()|| - weightedAdjacencyList.find(microClusterPair2) != weightedAdjacencyList.end()) + + + // SESAME_INFO("pair "<< microClusterNN.at(i)->id.front()<<" "<< microClusterNN.at(j)->id.front()); + if (weightedAdjacencyList.find( MicroClusterPair(microCluster, + microClusterNN.at(j))) != weightedAdjacencyList.end()) { SESAME_INFO("find microClusterPair!"<id.front()<<", "<id.front()); - clock_t startT= weightedAdjacencyList[microClusterPair]->updateTime; + clock_t startT= weightedAdjacencyList[ MicroClusterPair(microCluster, + microClusterNN.at(j))]->updateTime; double decayValue = dampedWindow->decayFunction(startT,this->pointArrivingTime); - weightedAdjacencyList[microClusterPair]->add(this->pointArrivingTime,decayValue); + weightedAdjacencyList[ MicroClusterPair(microCluster, + microClusterNN.at(j))]->add(this->pointArrivingTime,decayValue); //SESAME_INFO("pair weight = "<weight ); } else { - //SESAME_INFO("Create microClusterPair!"); + //SESAME_INFO("Create microClusterPair!" << microClusterNN.at(i)->id.front()<<", "<id.front()); AdjustedWeightPtr adjustedWeight = SESAME::DataStructureFactory::createAdjustedWeight(1,this->pointArrivingTime); - DensityGraph densityGraph(microClusterPair,adjustedWeight); + DensityGraph densityGraph( MicroClusterPair(microCluster, + microClusterNN.at(j)),adjustedWeight); weightedAdjacencyList.insert(densityGraph); // SESAME_INFO("pair weight = "<weight ); } @@ -194,8 +196,8 @@ void SESAME::DBStream::cleanUp(clock_t nowTime){ { for (interW = weightedAdjacencyList.begin(); interW != weightedAdjacencyList.end(); interW++) { - if (std::find(removeMicroCluster.begin(),removeMicroCluster.end(),interW->first->microCluster1) !=removeMicroCluster.end() - || std::find(removeMicroCluster.begin(),removeMicroCluster.end(),interW->first->microCluster2)!=removeMicroCluster.end()) + if (std::find(removeMicroCluster.begin(),removeMicroCluster.end(),interW->first.microCluster1) !=removeMicroCluster.end() + || std::find(removeMicroCluster.begin(),removeMicroCluster.end(),interW->first.microCluster2)!=removeMicroCluster.end()) { weightedAdjacencyList.erase(interW); SESAME_INFO("CLEAN existing entries.");} else{ @@ -214,8 +216,8 @@ void SESAME::DBStream::reCluster(double threshold){ unordered_map> connectivityGraph;//Connectivity graph C in DBStream paper WeightedAdjacencyList::iterator interW; for (interW = weightedAdjacencyList.begin(); interW != weightedAdjacencyList.end(); interW++){ - MicroClusterPtr microCluster1 =interW->first->microCluster1->copy(); - MicroClusterPtr microCluster2 =interW->first->microCluster2->copy(); + MicroClusterPtr microCluster1 =interW->first.microCluster1->copy(); + MicroClusterPtr microCluster2 =interW->first.microCluster2->copy(); if (microCluster1->weight >= dbStreamParams.weightMin &µCluster2->weight >= dbStreamParams.weightMin){ double val = 2*interW->second->weight / (microCluster1->weight+microCluster2->weight); if (val > threshold) { diff --git a/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp b/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp index a5cdadbd..db662119 100644 --- a/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp +++ b/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp @@ -7,12 +7,13 @@ SESAME::MicroClusterPair::MicroClusterPair( MicroClusterPtr microCluster1,MicroC this->microCluster2=microCluster2->copy(); } -bool SESAME::MicroClusterPair::equal(MicroClusterPairPtr other){ +bool SESAME::MicroClusterPair::operator==(const MicroClusterPair &other) const{ bool equal=false; - if(other->microCluster1==this->microCluster1&&other->microCluster2==this->microCluster2) + if(other.microCluster1==this->microCluster1&&other.microCluster2==this->microCluster2) equal=true; - if(other->microCluster1==this->microCluster2&&other->microCluster2==this->microCluster1) + if(other.microCluster1==this->microCluster2&&other.microCluster2==this->microCluster1) equal=true; + printf("Judging ...",equal); return equal; } SESAME::AdjustedWeight::AdjustedWeight(double weight, clock_t pointTime){ From 4e485342867f414db183856dd98826ff677d13f3 Mon Sep 17 00:00:00 2001 From: s1926539 Date: Mon, 20 Sep 2021 01:24:11 +0800 Subject: [PATCH 17/21] all test passed but still have bugs in Unordered_map using customed defined class --- include/Algorithm/DBStream.hpp | 2 +- .../Algorithm/DataStructure/MicroCluster.hpp | 13 ++- .../DataStructure/WeightedAdjacencyList.hpp | 40 ++++++--- src/Algorithm/DBStream.cpp | 86 ++++++++++--------- src/Algorithm/DataStructure/MicroCluster.cpp | 3 +- .../DataStructure/WeightedAdjacencyList.cpp | 15 +--- test/SystemTest/DBStreamTest.cpp | 8 +- 7 files changed, 94 insertions(+), 73 deletions(-) diff --git a/include/Algorithm/DBStream.hpp b/include/Algorithm/DBStream.hpp index c596bc0a..1e342305 100644 --- a/include/Algorithm/DBStream.hpp +++ b/include/Algorithm/DBStream.hpp @@ -34,7 +34,7 @@ class DBStream : public Algorithm std::vector microClusters; SESAME::WeightedAdjacencyList weightedAdjacencyList; std::vector microClusterNN;//micro clusters found in function findFixedRadiusNN - double weakEntry;//W_weak, weak entries + int weakEntry;//W_weak, weak entries double aWeakEntry; clock_t startTime; clock_t pointArrivingTime; diff --git a/include/Algorithm/DataStructure/MicroCluster.hpp b/include/Algorithm/DataStructure/MicroCluster.hpp index 6e6e8160..fb5e2812 100644 --- a/include/Algorithm/DataStructure/MicroCluster.hpp +++ b/include/Algorithm/DataStructure/MicroCluster.hpp @@ -33,8 +33,6 @@ class MicroCluster { double weight; //number of data point in the clusters int dimension; double radius;//Used in DBStream - - //the parameters below is unique for DenStream clock_t createTime; clock_t lastUpdateTime; @@ -46,6 +44,7 @@ class MicroCluster { MicroCluster(int dimension, int id); MicroCluster(int dimension, int id,PointPtr dataPoint,double radius);//DBStream + ~MicroCluster(); void init(PointPtr datapoint, int timestamp); void insert(PointPtr datapoint, int timestamp);//Used in CluStream @@ -79,5 +78,15 @@ class MicroCluster { double distance; static double inverseError(double x); }; +typedef struct finderMicroCluster +{ + finderMicroCluster(int n) : id(n) { } + bool operator()(MicroClusterPtr MC) + { + return (id == MC->id.front()); + } + int id; +}finderMicroCluster; + } #endif //SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_MICROCLUSTER_HPP_ diff --git a/include/Algorithm/DataStructure/WeightedAdjacencyList.hpp b/include/Algorithm/DataStructure/WeightedAdjacencyList.hpp index 7c079ebf..d4540f43 100644 --- a/include/Algorithm/DataStructure/WeightedAdjacencyList.hpp +++ b/include/Algorithm/DataStructure/WeightedAdjacencyList.hpp @@ -14,23 +14,41 @@ #include #include namespace SESAME { -class MicroClusterPair; +struct MicroClusterPair; typedef std::shared_ptr MicroClusterPairPtr; -class MicroClusterPair{ - public: +struct MicroClusterPair{ MicroClusterPtr microCluster1; MicroClusterPtr microCluster2; - MicroClusterPair( MicroClusterPtr microCluster1,MicroClusterPtr microCluster2); - bool operator==(const MicroClusterPair &other) const; + MicroClusterPair( MicroClusterPtr microCluster1,MicroClusterPtr microCluster2){ + this->microCluster1=microCluster1->copy(); + this->microCluster2=microCluster2->copy(); + } + //bool operator==(const MicroClusterPair &other) const; + }; -struct hashMicroClusterPair{ - size_t operator()(const MicroClusterPair µClusterPair) const +struct KeyHasher{ + std::size_t operator()(const MicroClusterPair µClusterPair) const { - return (std::hash()(microClusterPair.microCluster1)) + (std::hash()(microClusterPair.microCluster2)); + return (std::hash()(microClusterPair.microCluster1->id.front())) ^ (std::hash()(microClusterPair.microCluster2->id.front())); } }; +struct EqualKey + { + bool operator() (const MicroClusterPair &MCPair1, const MicroClusterPair &MCPair2) const + { + bool equal=false; + if( MCPair1.microCluster1->id.front()==MCPair2.microCluster1->id.front() &&MCPair1.microCluster2->id.front()==MCPair2.microCluster2->id.front() ) + equal=true; + if(MCPair1.microCluster1->id.front() ==MCPair2.microCluster2->id.front() &&MCPair1.microCluster2->id.front()==MCPair2.microCluster1->id.front() ) + equal=true; + + return equal; + } + }; + + class AdjustedWeight; typedef std::shared_ptr AdjustedWeightPtr; class AdjustedWeight{ @@ -42,11 +60,7 @@ class AdjustedWeight{ double getCurrentWeight(double decayFactor); }; -typedef std::unordered_map WeightedAdjacencyList; +typedef std::unordered_map WeightedAdjacencyList; typedef std::pair DensityGraph; -//S in paper, represent Weighted Adjacency List - } - - #endif //SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_WEIGHTEDADJACENCYLIST_HPP_ diff --git a/src/Algorithm/DBStream.cpp b/src/Algorithm/DBStream.cpp index 77c70d11..63d856ab 100644 --- a/src/Algorithm/DBStream.cpp +++ b/src/Algorithm/DBStream.cpp @@ -39,8 +39,8 @@ void SESAME::DBStream::Initilize() { this->startTime = clock(); this->pointArrivingTime= clock(); this->lastCleanTime=clock(); - this->weakEntry= pow(dbStreamParams.base,(-1)*dbStreamParams.lambda*dbStreamParams.cleanUpInterval); - this->aWeakEntry=weakEntry*dbStreamParams.alpha; + this->weakEntry= ceil(pow(dbStreamParams.base,(-1)*dbStreamParams.lambda*dbStreamParams.cleanUpInterval)); + this->aWeakEntry=ceil(weakEntry*dbStreamParams.alpha); this->microClusterIndex=-1; } /** @@ -100,32 +100,28 @@ void SESAME::DBStream::update(PointPtr dataPoint){ else { for (int i = 0; i < sizeNN; i++) { //SESAME_INFO("insert into existing MCs! id "<< microClusterNN.at(i)->id.front()); - MicroClusterPtr microCluster = microClusterNN.at(i); + MicroClusterPtr microCluster = microClusterNN.at(i)->copy(); microCluster->insert(dataPoint,decayFactor); // just update weight // update shared density for (int j = i + 1; j < sizeNN; j++) { - - + MicroClusterPair microClusterPair(microCluster->copy(), microClusterNN.at(j)->copy()); // SESAME_INFO("pair "<< microClusterNN.at(i)->id.front()<<" "<< microClusterNN.at(j)->id.front()); - if (weightedAdjacencyList.find( MicroClusterPair(microCluster, - microClusterNN.at(j))) != weightedAdjacencyList.end()) + if (weightedAdjacencyList.find(microClusterPair) != weightedAdjacencyList.end()) { - SESAME_INFO("find microClusterPair!"<id.front()<<", "<id.front()); - clock_t startT= weightedAdjacencyList[ MicroClusterPair(microCluster, - microClusterNN.at(j))]->updateTime; + // SESAME_INFO("find microClusterPair!"<id.front()<<", "<id.front()); + clock_t startT= weightedAdjacencyList[microClusterPair]->updateTime; double decayValue = dampedWindow->decayFunction(startT,this->pointArrivingTime); - weightedAdjacencyList[ MicroClusterPair(microCluster, - microClusterNN.at(j))]->add(this->pointArrivingTime,decayValue); - //SESAME_INFO("pair weight = "<weight ); - } - else - { - //SESAME_INFO("Create microClusterPair!" << microClusterNN.at(i)->id.front()<<", "<id.front()); + weightedAdjacencyList[microClusterPair]->add(this->pointArrivingTime,decayValue); + // SESAME_INFO("pair weight = "<weight ); + // SESAME_INFO(" weight is "<weight ); + }else{ + // SESAME_INFO("Create microClusterPair!" << microClusterNN.at(i)->id.front()<<", "<id.front()); AdjustedWeightPtr adjustedWeight = SESAME::DataStructureFactory::createAdjustedWeight(1,this->pointArrivingTime); - DensityGraph densityGraph( MicroClusterPair(microCluster, - microClusterNN.at(j)),adjustedWeight); + DensityGraph densityGraph( microClusterPair ,adjustedWeight); weightedAdjacencyList.insert(densityGraph); - // SESAME_INFO("pair weight = "<weight ); + + // SESAME_INFO("new one weight is "<weight ); + // SESAME_INFO("size is"<lastCleanTime)/CLOCKS_PER_SEC)>= dbStreamParams.cleanUpInterval && dataPoint->getIndex()!=0) { - cleanUp(this->pointArrivingTime); this->lastCleanTime=this->pointArrivingTime; + SESAME_INFO("Clean !"); } + } @@ -181,35 +178,46 @@ void SESAME::DBStream::cleanUp(clock_t nowTime){ std::vector removeMicroCluster; std::vector::size_type iter; //Check the current micro Clusters whether they have weak MCs + //This just test for remove id + std::vector idList; for (iter=0;iterweight <= this->weakEntry) { removeMicroCluster.push_back(microClusters.at(iter)->copy()); + idList.push_back(microClusters.at(iter)->id.front()); microClusters.erase(microClusters.begin()+int(iter));//Delete this MC from current MC list } } - SESAME_INFO("CLEAN MCs Already."); - WeightedAdjacencyList::iterator interW; - //Check the current shared density graph whether they have weak entries - if(!weightedAdjacencyList.empty()) + //SESAME_INFO("now rm MCs number is "<(re, " ")); + SESAME_INFO("RM list "<first.microCluster1) !=removeMicroCluster.end() - || std::find(removeMicroCluster.begin(),removeMicroCluster.end(),interW->first.microCluster2)!=removeMicroCluster.end()) - { - weightedAdjacencyList.erase(interW); SESAME_INFO("CLEAN existing entries.");} - else{ - - double decayFactor=dampedWindow->decayFunction(interW->second->updateTime,nowTime); - if (interW->second->getCurrentWeight(decayFactor) < aWeakEntry) - weightedAdjacencyList.erase(interW); - } - } + nowTime; + auto exist1 = std::find_if(removeMicroCluster.begin(), removeMicroCluster.end(),SESAME::finderMicroCluster(iterW->first.microCluster1->id.front())); + auto exist2 = std::find_if(removeMicroCluster.begin(), removeMicroCluster.end(),SESAME::finderMicroCluster(iterW->first.microCluster2->id.front())); + if ( exist1!=removeMicroCluster.end()|| exist2!=removeMicroCluster.end()) + { + // SESAME_INFO("weightedAdjacencyList size:"<decayFunction(iterW->second->updateTime,nowTime); + SESAME_INFO("Check existing entries... "<<" "<second->getCurrentWeight(decayFactor)); + if (iterW->second->getCurrentWeight(decayFactor) < aWeakEntry) + { + SESAME_INFO("erase... "<<" "<first); + //SESAME_INFO("pair is "<first.microCluster1->id.front()<<" "<first.microCluster2->id.front()); + } + } } - } void SESAME::DBStream::reCluster(double threshold){ diff --git a/src/Algorithm/DataStructure/MicroCluster.cpp b/src/Algorithm/DataStructure/MicroCluster.cpp index 6f7a436c..1de19268 100644 --- a/src/Algorithm/DataStructure/MicroCluster.cpp +++ b/src/Algorithm/DataStructure/MicroCluster.cpp @@ -348,4 +348,5 @@ double SESAME::MicroCluster::inverseError(double x){ SESAME::MicroClusterPtr SESAME::MicroCluster::copy() { return std::make_shared(*this); -} \ No newline at end of file +} + diff --git a/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp b/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp index db662119..1b93458f 100644 --- a/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp +++ b/src/Algorithm/DataStructure/WeightedAdjacencyList.cpp @@ -2,20 +2,9 @@ // Created by 1124a on 2021/8/30. // #include -SESAME::MicroClusterPair::MicroClusterPair( MicroClusterPtr microCluster1,MicroClusterPtr microCluster2){ - this->microCluster1=microCluster1->copy(); - this->microCluster2=microCluster2->copy(); -} -bool SESAME::MicroClusterPair::operator==(const MicroClusterPair &other) const{ - bool equal=false; - if(other.microCluster1==this->microCluster1&&other.microCluster2==this->microCluster2) - equal=true; - if(other.microCluster1==this->microCluster2&&other.microCluster2==this->microCluster1) - equal=true; - printf("Judging ...",equal); - return equal; -} + + SESAME::AdjustedWeight::AdjustedWeight(double weight, clock_t pointTime){ this->weight=weight; this->updateTime=pointTime; diff --git a/test/SystemTest/DBStreamTest.cpp b/test/SystemTest/DBStreamTest.cpp index 6d9a099a..fa319440 100644 --- a/test/SystemTest/DBStreamTest.cpp +++ b/test/SystemTest/DBStreamTest.cpp @@ -21,10 +21,10 @@ TEST(SystemTest, DBStreamTest) { cmd_params.pointNumber = 15120; cmd_params.dimension = 54; cmd_params.base=2; - cmd_params.lambda= 0.125; - cmd_params.radius= 1500; - cmd_params.cleanUpInterval=30; - cmd_params.weightMin=2; + cmd_params.lambda= 0.00001; + cmd_params.radius= 500; + cmd_params.cleanUpInterval=3; + cmd_params.weightMin=3; cmd_params.alpha=0.3; cmd_params.inputPath = std::filesystem::current_path().generic_string() + "/datasets/CoverType.txt"; cmd_params.outputPath = "results.txt"; From 0370f593cb16da671af85f3095cca843ef539b47 Mon Sep 17 00:00:00 2001 From: s1926539 Date: Mon, 20 Sep 2021 01:25:36 +0800 Subject: [PATCH 18/21] Only need to fix bug in clean up functions when erase ... --- src/Algorithm/DBStream.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Algorithm/DBStream.cpp b/src/Algorithm/DBStream.cpp index 63d856ab..e7289236 100644 --- a/src/Algorithm/DBStream.cpp +++ b/src/Algorithm/DBStream.cpp @@ -202,20 +202,20 @@ void SESAME::DBStream::cleanUp(clock_t nowTime){ if ( exist1!=removeMicroCluster.end()|| exist2!=removeMicroCluster.end()) { // SESAME_INFO("weightedAdjacencyList size:"<decayFunction(iterW->second->updateTime,nowTime); - SESAME_INFO("Check existing entries... "<<" "<second->getCurrentWeight(decayFactor)); + // SESAME_INFO("Check existing entries... "<<" "<second->getCurrentWeight(decayFactor)); if (iterW->second->getCurrentWeight(decayFactor) < aWeakEntry) - { - SESAME_INFO("erase... "<<" "<first); + + // SESAME_INFO("erase... "<<" "<first); //SESAME_INFO("pair is "<first.microCluster1->id.front()<<" "<first.microCluster2->id.front()); - } + } } } From 9233b340fcd760806c2a6cd20e65d3b71abf43c3 Mon Sep 17 00:00:00 2001 From: s1926539 Date: Tue, 21 Sep 2021 00:16:07 +0800 Subject: [PATCH 19/21] all bugs fixed, but still have some in unordered_map --- include/Algorithm/DBStream.hpp | 11 +- .../Algorithm/DataStructure/MicroCluster.hpp | 2 +- src/Algorithm/DBStream.cpp | 189 +++++++++--------- test/SystemTest/DBStreamTest.cpp | 10 +- 4 files changed, 111 insertions(+), 101 deletions(-) diff --git a/include/Algorithm/DBStream.hpp b/include/Algorithm/DBStream.hpp index 1e342305..13a09939 100644 --- a/include/Algorithm/DBStream.hpp +++ b/include/Algorithm/DBStream.hpp @@ -42,6 +42,9 @@ class DBStream : public Algorithm int microClusterIndex; //Final output of clusters Clusters finalClusters; + //Connectivity graph + unordered_map> connecvtivityGraphId; + //TODO Need to implement weighted a weighted adjacency list S DBStream(param_t &cmd_params); ~DBStream(); @@ -55,11 +58,9 @@ class DBStream : public Algorithm std::vector findFixedRadiusNN(PointPtr dataPoint); void cleanUp(clock_t nowTime); void reCluster(double threshold); - static void insertIntoGraph(unordered_map> connectivityGraph, - MicroClusterPtr microCluster,MicroClusterPtr Other); - static void insertIntoGraph(unordered_map> connectivityGraph, - MicroClusterPtr microCluster); - void findConnectedComponents(unordered_map> connectivityGraph); + void insertIntoGraph(int microClusterId,int OtherId); + void insertIntoGraph(int microClusterId); + void findConnectedComponents(); }; diff --git a/include/Algorithm/DataStructure/MicroCluster.hpp b/include/Algorithm/DataStructure/MicroCluster.hpp index fb5e2812..2db4bb81 100644 --- a/include/Algorithm/DataStructure/MicroCluster.hpp +++ b/include/Algorithm/DataStructure/MicroCluster.hpp @@ -81,7 +81,7 @@ class MicroCluster { typedef struct finderMicroCluster { finderMicroCluster(int n) : id(n) { } - bool operator()(MicroClusterPtr MC) + bool operator()(const MicroClusterPtr MC) const { return (id == MC->id.front()); } diff --git a/src/Algorithm/DBStream.cpp b/src/Algorithm/DBStream.cpp index e7289236..f7e34932 100644 --- a/src/Algorithm/DBStream.cpp +++ b/src/Algorithm/DBStream.cpp @@ -53,7 +53,6 @@ void SESAME::DBStream::Initilize() { SESAME_INFO("Start initialize..."); Initilize(); this->isInitial = true; - } else { @@ -62,11 +61,7 @@ void SESAME::DBStream::Initilize() { update(input); } } -void SESAME::DBStream::runOfflineClustering(DataSinkPtr sinkPtr) { - SESAME_INFO("Start offline..."); - sinkPtr; - reCluster(dbStreamParams.alpha); -} + /** * @Description: Insert data point into existing MCs, @@ -89,10 +84,8 @@ void SESAME::DBStream::update(PointPtr dataPoint){ // SESAME_INFO("find suitable MCs number : "<id.front()); - MicroClusterPtr microCluster = microClusterNN.at(i)->copy(); - microCluster->insert(dataPoint,decayFactor); // just update weight - // update shared density + // MicroClusterPtr microCluster = microClusterNN.at(i); + // SESAME_INFO(" decay F is "<insert(dataPoint,decayFactor); // just update weight for (int j = i + 1; j < sizeNN; j++) { - MicroClusterPair microClusterPair(microCluster->copy(), microClusterNN.at(j)->copy()); - // SESAME_INFO("pair "<< microClusterNN.at(i)->id.front()<<" "<< microClusterNN.at(j)->id.front()); + MicroClusterPair microClusterPair(microClusterNN[i], microClusterNN.at(j)); if (weightedAdjacencyList.find(microClusterPair) != weightedAdjacencyList.end()) { - // SESAME_INFO("find microClusterPair!"<id.front()<<", "<id.front()); clock_t startT= weightedAdjacencyList[microClusterPair]->updateTime; double decayValue = dampedWindow->decayFunction(startT,this->pointArrivingTime); weightedAdjacencyList[microClusterPair]->add(this->pointArrivingTime,decayValue); - // SESAME_INFO("pair weight = "<weight ); // SESAME_INFO(" weight is "<weight ); - }else{ + } else{ // SESAME_INFO("Create microClusterPair!" << microClusterNN.at(i)->id.front()<<", "<id.front()); AdjustedWeightPtr adjustedWeight = SESAME::DataStructureFactory::createAdjustedWeight(1,this->pointArrivingTime); DensityGraph densityGraph( microClusterPair ,adjustedWeight); weightedAdjacencyList.insert(densityGraph); - // SESAME_INFO("new one weight is "<weight ); // SESAME_INFO("size is"<pointArrivingTime); this->lastCleanTime=this->pointArrivingTime; - SESAME_INFO("Clean !"); } } @@ -173,7 +160,6 @@ bool SESAME::DBStream::checkMove( std::vector microClustersList return move; } - void SESAME::DBStream::cleanUp(clock_t nowTime){ std::vector removeMicroCluster; std::vector::size_type iter; @@ -189,58 +175,81 @@ void SESAME::DBStream::cleanUp(clock_t nowTime){ microClusters.erase(microClusters.begin()+int(iter));//Delete this MC from current MC list } } - //SESAME_INFO("now rm MCs number is "<(re, " ")); - SESAME_INFO("RM list "<first.microCluster1->id.front())); auto exist2 = std::find_if(removeMicroCluster.begin(), removeMicroCluster.end(),SESAME::finderMicroCluster(iterW->first.microCluster2->id.front())); - if ( exist1!=removeMicroCluster.end()|| exist2!=removeMicroCluster.end()) - { - // SESAME_INFO("weightedAdjacencyList size:"<decayFunction(iterW->second->updateTime,nowTime); - // SESAME_INFO("Check existing entries... "<<" "<second->getCurrentWeight(decayFactor)); if (iterW->second->getCurrentWeight(decayFactor) < aWeakEntry) + iterW=weightedAdjacencyList.erase(iterW); + else + iterW++; + } + } + SESAME_INFO("CLEAN! now weightedAdjacencyList size:"<first); - //SESAME_INFO("pair is "<first.microCluster1->id.front()<<" "<first.microCluster2->id.front()); - } +void SESAME::DBStream::runOfflineClustering(DataSinkPtr sinkPtr) { + reCluster(dbStreamParams.alpha); + for(auto iter=0; iter!=finalClusters.size();iter++) + { //initialize pseudo point of macro clusters + PointPtr point = DataStructureFactory::createPoint(iter, 0, finalClusters.at(iter).front()->dimension, 0); + //This is just for testing, need to delete + std::vector centroid(finalClusters.at(iter).front()->dimension,0); + for(auto j=0; j!=finalClusters.at(iter).size();j++) + { + // SESAME_INFO(finalClusters.at(iter)[j]->id.front()<<" MC weight is "<weight); + double currentWeight=point->getWeight()+finalClusters.at(iter).at(j)->weight; + point->setWeight(currentWeight); + for(auto a =0;adimension;a++) + { + if(j==0) + point->setFeatureItem(0,a); + point->setFeatureItem(point->getFeatureItem(a)+finalClusters.at(iter).at(j)->centroid.at(a),a); + centroid[a]=point->getFeatureItem(a);//testing + if(j==finalClusters.at(iter).size()-1) + { + point->setFeatureItem(point->getFeatureItem(a)/finalClusters.at(iter).at(j)->dimension,a); + centroid[a] =centroid[a]/finalClusters.at(iter).at(j)->dimension;//testing + } + } + } + SESAME_INFO("The NO."<getWeight()); + std::stringstream re; + std::copy(centroid.begin(),centroid.end(),std::ostream_iterator(re, " ")); + // SESAME_INFO("The NO."<put(point->copy()); // point index start from 0 } } + void SESAME::DBStream::reCluster(double threshold){ - unordered_map> connectivityGraph;//Connectivity graph C in DBStream paper - WeightedAdjacencyList::iterator interW; - for (interW = weightedAdjacencyList.begin(); interW != weightedAdjacencyList.end(); interW++){ - MicroClusterPtr microCluster1 =interW->first.microCluster1->copy(); - MicroClusterPtr microCluster2 =interW->first.microCluster2->copy(); - if (microCluster1->weight >= dbStreamParams.weightMin &µCluster2->weight >= dbStreamParams.weightMin){ - double val = 2*interW->second->weight / (microCluster1->weight+microCluster2->weight); + WeightedAdjacencyList::iterator iterW; + for (iterW = weightedAdjacencyList.begin(); iterW != weightedAdjacencyList.end(); iterW++){ + if (iterW->first.microCluster1->weight >= dbStreamParams.weightMin &&iterW->first.microCluster2->weight >= dbStreamParams.weightMin){ + double val = 2*iterW->second->weight / (iterW->first.microCluster1->weight+iterW->first.microCluster2->weight); if (val > threshold) { - insertIntoGraph(connectivityGraph,microCluster1,microCluster2); - insertIntoGraph(connectivityGraph,microCluster2,microCluster1); + insertIntoGraph( iterW->first.microCluster1->id.front(),iterW->first.microCluster2->id.front()); + insertIntoGraph(iterW->first.microCluster2->id.front(), iterW->first.microCluster1->id.front()); } else { - insertIntoGraph(connectivityGraph,microCluster1); - insertIntoGraph(connectivityGraph,microCluster2); + insertIntoGraph(iterW->first.microCluster1->id.front()); + insertIntoGraph(iterW->first.microCluster2->id.front()); } } } - findConnectedComponents(connectivityGraph); - + findConnectedComponents(); } /** * @Description: insert vertices and entries into connectivity graph when micro cluster pair @@ -251,26 +260,28 @@ void SESAME::DBStream::reCluster(double threshold){ * @Return: void */ -void SESAME::DBStream::insertIntoGraph(unordered_map> connectivityGraph, - MicroClusterPtr microCluster,MicroClusterPtr Other){ - if (connectivityGraph.find(microCluster)!=connectivityGraph.end()) - connectivityGraph.find(microCluster)->second.push_back(Other); - else{ - microCluster->visited = false; - std::vector newMicroClusterSet; - newMicroClusterSet.push_back(Other); - connectivityGraph.insert(std::make_pair(microCluster,newMicroClusterSet)); +void SESAME::DBStream::insertIntoGraph(int microClusterId,int OtherId){ + if (connecvtivityGraphId.find(microClusterId)!=connecvtivityGraphId.end()) + { + if(std::find(connecvtivityGraphId[microClusterId].begin(),connecvtivityGraphId[microClusterId].end(),OtherId)==connecvtivityGraphId[microClusterId].end()) + connecvtivityGraphId[microClusterId].push_back(OtherId); + } else{ + auto microCluster = std::find_if(microClusters.begin(), microClusters.end(),SESAME::finderMicroCluster(microClusterId)); + (*microCluster)->visited=false; + std::vector newMicroClusterIdSet; + newMicroClusterIdSet.push_back(OtherId); + connecvtivityGraphId.insert(make_pair(microClusterId,OtherId)); + //SESAME_INFO("Key cluster size: "<> connectivityGraph, - MicroClusterPtr microCluster){ - if (connectivityGraph.find(microCluster)==connectivityGraph.end()) +void SESAME::DBStream::insertIntoGraph(int microClusterId){ + if (connecvtivityGraphId.find(microClusterId)==connecvtivityGraphId.end()) { - microCluster->visited = false; - std::vector newMicroClusterSet; - connectivityGraph.insert(std::make_pair(microCluster,newMicroClusterSet)); + auto microCluster = std::find_if(microClusters.begin(), microClusters.end(),SESAME::finderMicroCluster(microClusterId)); + (*microCluster)->visited=false; + std::vector newMicroClusterIdSet; + connecvtivityGraphId.insert(make_pair(microClusterId,newMicroClusterIdSet)); } } @@ -282,35 +293,33 @@ void SESAME::DBStream::insertIntoGraph(unordered_map> connectivityGraph){ - unordered_map>::iterator iter; +void SESAME::DBStream::findConnectedComponents(){ + unordered_map>::iterator iter; //This variable just for indicating the id of micro cluster which forming macro clusters - std::vector idList; - for (iter = connectivityGraph.begin(); iter != connectivityGraph.end(); iter++){ - if (!iter->first->visited) { - std::vector newCluster, clusterGroup; - newCluster.push_back(iter->first); - for(const auto & iterS : iter->second) - { - if (!iterS->visited) - newCluster.push_back(iterS); - } - SESAME_INFO("New formed macro cluster ... including micro cluster :"); - while (!newCluster.empty()) + for (iter = connecvtivityGraphId.begin(); iter != connecvtivityGraphId.end(); iter++){ + std::vector idList; + auto microClusterKey = std::find_if(microClusters.begin(), microClusters.end(),SESAME::finderMicroCluster(iter->first)); + if (!(*microClusterKey)->visited) { + std::vector newCluster; + newCluster.push_back((*microClusterKey)); + idList.push_back(iter->first); + for(int iterValue : iter->second) { - //after found the front one, insert it into clusterGroup and delete from the original vector - MicroClusterPtr microCluster = newCluster.front(); - newCluster.erase(newCluster.begin()); - clusterGroup.push_back(microCluster); - idList.push_back(microCluster->id.front()); - microCluster->visited = true; + auto microClusterElement = std::find_if(microClusters.begin(), microClusters.end(),SESAME::finderMicroCluster(iterValue)); + if (!(*microClusterElement)->visited) + { + newCluster.push_back((*microClusterElement)); + (*microClusterElement)->visited = true; + idList.push_back((*microClusterElement)->id.front()); + } } - this->finalClusters.push_back(clusterGroup); + this->finalClusters.push_back(newCluster); //just used for examine reform ,need to delete later std::stringstream result; std::copy(idList.begin(),idList.end(),std::ostream_iterator(result, " ")); + SESAME_INFO("New formed macro cluster ... including micro cluster :"); SESAME_INFO(" " << result.str() ); } + } } diff --git a/test/SystemTest/DBStreamTest.cpp b/test/SystemTest/DBStreamTest.cpp index fa319440..8145de58 100644 --- a/test/SystemTest/DBStreamTest.cpp +++ b/test/SystemTest/DBStreamTest.cpp @@ -18,14 +18,14 @@ TEST(SystemTest, DBStreamTest) { setupLogging("benchmark.log", LOG_DEBUG); //Parse parameters. param_t cmd_params; - cmd_params.pointNumber = 15120; + cmd_params.pointNumber = 1000; cmd_params.dimension = 54; cmd_params.base=2; - cmd_params.lambda= 0.00001; - cmd_params.radius= 500; - cmd_params.cleanUpInterval=3; + cmd_params.lambda= 0.001; + cmd_params.radius= 1200; + cmd_params.cleanUpInterval=12; cmd_params.weightMin=3; - cmd_params.alpha=0.3; + cmd_params.alpha=0.25; cmd_params.inputPath = std::filesystem::current_path().generic_string() + "/datasets/CoverType.txt"; cmd_params.outputPath = "results.txt"; cmd_params.algoType = SESAME::DBStreamType; From 722ec418a1e4b7096f6414c0aec1ae479e8dcf3b Mon Sep 17 00:00:00 2001 From: s1926539 Date: Tue, 21 Sep 2021 18:17:44 +0800 Subject: [PATCH 20/21] all fixed in DBStream --- src/Algorithm/DBStream.cpp | 16 ++++------------ src/Algorithm/WindowModel/DampedWindow.cpp | 2 +- test/SystemTest/DBStreamTest.cpp | 10 +++++----- 3 files changed, 10 insertions(+), 18 deletions(-) diff --git a/src/Algorithm/DBStream.cpp b/src/Algorithm/DBStream.cpp index f7e34932..88ba8c96 100644 --- a/src/Algorithm/DBStream.cpp +++ b/src/Algorithm/DBStream.cpp @@ -77,7 +77,6 @@ void SESAME::DBStream::Initilize() { void SESAME::DBStream::update(PointPtr dataPoint){ double decayFactor=dampedWindow->decayFunction(this->pointArrivingTime, clock()); - // SESAME_INFO("decayFactor is..."<pointArrivingTime=clock(); this->microClusterNN=findFixedRadiusNN(dataPoint); std::vector::size_type sizeNN=microClusterNN.size(); @@ -92,9 +91,6 @@ void SESAME::DBStream::update(PointPtr dataPoint){ } else { for (int i = 0; i < sizeNN; i++) { - //SESAME_INFO("insert into existing MCs! id "<< microClusterNN.at(i)->id.front()); - // MicroClusterPtr microCluster = microClusterNN.at(i); - // SESAME_INFO(" decay F is "<insert(dataPoint,decayFactor); // just update weight for (int j = i + 1; j < sizeNN; j++) { MicroClusterPair microClusterPair(microClusterNN[i], microClusterNN.at(j)); @@ -109,8 +105,6 @@ void SESAME::DBStream::update(PointPtr dataPoint){ AdjustedWeightPtr adjustedWeight = SESAME::DataStructureFactory::createAdjustedWeight(1,this->pointArrivingTime); DensityGraph densityGraph( microClusterPair ,adjustedWeight); weightedAdjacencyList.insert(densityGraph); - // SESAME_INFO("new one weight is "<weight ); - // SESAME_INFO("size is"<(re, " ")); SESAME_INFO("RM list "< centroid(finalClusters.at(iter).front()->dimension,0); for(auto j=0; j!=finalClusters.at(iter).size();j++) { - // SESAME_INFO(finalClusters.at(iter)[j]->id.front()<<" MC weight is "<weight); double currentWeight=point->getWeight()+finalClusters.at(iter).at(j)->weight; point->setWeight(currentWeight); for(auto a =0;adimension;a++) @@ -224,10 +217,9 @@ void SESAME::DBStream::runOfflineClustering(DataSinkPtr sinkPtr) { } } } - SESAME_INFO("The NO."<getWeight()); std::stringstream re; std::copy(centroid.begin(),centroid.end(),std::ostream_iterator(re, " ")); - // SESAME_INFO("The NO."<put(point->copy()); // point index start from 0 } } @@ -317,8 +309,8 @@ void SESAME::DBStream::findConnectedComponents(){ //just used for examine reform ,need to delete later std::stringstream result; std::copy(idList.begin(),idList.end(),std::ostream_iterator(result, " ")); - SESAME_INFO("New formed macro cluster ... including micro cluster :"); - SESAME_INFO(" " << result.str() ); + //SESAME_INFO("New formed macro cluster ... including micro cluster :"); + //SESAME_INFO(" " << result.str() ); } } diff --git a/src/Algorithm/WindowModel/DampedWindow.cpp b/src/Algorithm/WindowModel/DampedWindow.cpp index 623cf860..1c13dd02 100644 --- a/src/Algorithm/WindowModel/DampedWindow.cpp +++ b/src/Algorithm/WindowModel/DampedWindow.cpp @@ -9,6 +9,6 @@ SESAME::DampedWindow::DampedWindow(double base, double lambda){ double SESAME::DampedWindow::decayFunction(clock_t startTime, clock_t currentTimestamp) const { - double elapsedTime = (int) ((currentTimestamp - startTime) / CLOCKS_PER_SEC); + double elapsedTime = (double) (currentTimestamp - startTime) / CLOCKS_PER_SEC; return pow(this->base, -1 * this->lambda * elapsedTime); } diff --git a/test/SystemTest/DBStreamTest.cpp b/test/SystemTest/DBStreamTest.cpp index 8145de58..9a4071f5 100644 --- a/test/SystemTest/DBStreamTest.cpp +++ b/test/SystemTest/DBStreamTest.cpp @@ -18,13 +18,13 @@ TEST(SystemTest, DBStreamTest) { setupLogging("benchmark.log", LOG_DEBUG); //Parse parameters. param_t cmd_params; - cmd_params.pointNumber = 1000; + cmd_params.pointNumber = 15120; cmd_params.dimension = 54; cmd_params.base=2; - cmd_params.lambda= 0.001; - cmd_params.radius= 1200; - cmd_params.cleanUpInterval=12; - cmd_params.weightMin=3; + cmd_params.lambda= 0.0001; + cmd_params.radius= 500; + cmd_params.cleanUpInterval=3; + cmd_params.weightMin=2; cmd_params.alpha=0.25; cmd_params.inputPath = std::filesystem::current_path().generic_string() + "/datasets/CoverType.txt"; cmd_params.outputPath = "results.txt"; From 55bb5522287948580caf51328d873a0c5e908429 Mon Sep 17 00:00:00 2001 From: s1926539 Date: Tue, 21 Sep 2021 19:38:26 +0800 Subject: [PATCH 21/21] reformat and recover other test --- include/Algorithm/DBStream.hpp | 17 +++++------------ .../DataStructure/WeightedAdjacencyList.hpp | 4 +--- test/CMakeLists.txt | 8 ++++---- 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/include/Algorithm/DBStream.hpp b/include/Algorithm/DBStream.hpp index 13a09939..5c17756d 100644 --- a/include/Algorithm/DBStream.hpp +++ b/include/Algorithm/DBStream.hpp @@ -4,28 +4,23 @@ #ifndef SESAME_INCLUDE_ALGORITHM_DBSTREAM_HPP_ #define SESAME_INCLUDE_ALGORITHM_DBSTREAM_HPP_ -#include -#include -#include -#include -#include -#include #include -#include -#include -#include #include +#include + namespace SESAME { typedef std::vector> Clusters; class DBStreamParams : public AlgorithmParameters { public: double radius; double lambda; - int cleanUpInterval;//Tgap + int cleanUpInterval;//Time gap double weightMin;//minimum weight double alpha;//α, intersection factor double base;//base of decay function }; + + class DBStream : public Algorithm { public: @@ -61,8 +56,6 @@ class DBStream : public Algorithm void insertIntoGraph(int microClusterId,int OtherId); void insertIntoGraph(int microClusterId); void findConnectedComponents(); - }; - } #endif //SESAME_INCLUDE_ALGORITHM_DBSTREAM_HPP_ diff --git a/include/Algorithm/DataStructure/WeightedAdjacencyList.hpp b/include/Algorithm/DataStructure/WeightedAdjacencyList.hpp index d4540f43..029f3422 100644 --- a/include/Algorithm/DataStructure/WeightedAdjacencyList.hpp +++ b/include/Algorithm/DataStructure/WeightedAdjacencyList.hpp @@ -5,14 +5,12 @@ #ifndef SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_WEIGHTEDADJACENCYLIST_HPP_ #define SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_WEIGHTEDADJACENCYLIST_HPP_ #include -#include -#include #include -#include #include #include #include #include + namespace SESAME { struct MicroClusterPair; typedef std::shared_ptr MicroClusterPairPtr; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index edc6fca2..dcbb5001 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,9 +1,9 @@ # adding the Google_Tests_run target add_executable(Google_Tests_run - #SystemTest/CluStreamTest.cpp - #SystemTest/StreamKMTest.cpp - #SystemTest/BirchTest.cpp - #SystemTest/DenStreamTest.cpp + SystemTest/CluStreamTest.cpp + SystemTest/StreamKMTest.cpp + SystemTest/BirchTest.cpp + SystemTest/DenStreamTest.cpp SystemTest/DBStreamTest.cpp )