From ed3b38fae2ff2a9a5beabe4eb29593fe3c56ac52 Mon Sep 17 00:00:00 2001 From: Gabriel WU <45706792+gabrielwunr@users.noreply.github.com> Date: Mon, 13 Dec 2021 18:14:42 +0800 Subject: [PATCH] Dstream finish (#79) * ReWrite DStream based on MOA * ReWrite DStream based on MOA * ReWrite DStream based on MOA, left Debug after refactor all algorithms * Evaluation wx (#70) * add purity * add preparations for CMM, still have problem about driver * CMM done, but need to remove hard code * still need to change voteMap function * purity wx (#71) * add purity * add preparations for CMM, still have problem about driver * CMM done, but need to remove hard code * still need to change voteMap function * fix bug of purity * Update README.md * Add Outlier Detection Part (#72) * Update README.md * pull request from DataSource-yzh to main (#74) * Updated DataSource * Removed hard-code in DataSource::load * Modified timer and removed hard-coded queue initialization Co-authored-by: Zhonghao Yang Co-authored-by: Zhonghao Yang * Fix timer (#75) * Construct Time measurement * Constructing Time measurement * Re write timer hpp * Construct Time measurement * Constructing Time measurement * Re write timer hpp * Finish timer, need final debug ... X_X * A LITTLE BUG IN OUTLIER DETECTION PART, NEED TO ADJUST ACCUMULATE TIMER * turn accumulate timer into more simple one * turn accumulate timer into more simple one * fix all * Subtract connection based offline refinement. which can be used later in refactor part (#76) * modify DStream * FU DStream, no bugs, performance is terrible * FU DStream, no bugs, performance now OK * ReWrite DStream based on MOA, left Debug after refactor all algorithms * FU DStream, no bugs, performance now OK * ReWrite DStream based on MOA, left Debug after refactor all algorithms * delete most of the print * ReWrite DStream based on MOA, left Debug after refactor all algorithms * delete most of the print * ReWrite DStream based on MOA, left Debug after refactor all algorithms * delete most of the print * ReWrite DStream based on MOA * ReWrite DStream based on MOA * ReWrite DStream based on MOA, left Debug after refactor all algorithms * modify DStream * FU DStream, no bugs, performance is terrible * FU DStream, no bugs, performance now OK * ReWrite DStream based on MOA, left Debug after refactor all algorithms * FU DStream, no bugs, performance now OK * recover test cmake Co-authored-by: tuidan <40883104+tuidan@users.noreply.github.com> Co-authored-by: Tony Co-authored-by: Zhonghao Yang <62000831+zhonghao-yang@users.noreply.github.com> Co-authored-by: Zhonghao Yang Co-authored-by: Zhonghao Yang Former-commit-id: 609681d41bfe9cda9764f7b5f25961302d73f58e --- include/Algorithm/Algorithm.hpp | 2 +- include/Algorithm/DBStream.hpp | 2 +- include/Algorithm/DStream.hpp | 96 ++ .../DataStructure/CharacteristicVector.hpp | 86 ++ .../Algorithm/DataStructure/DensityGrid.hpp | 105 ++ .../Algorithm/DataStructure/GridCluster.hpp | 91 ++ include/Utils/BenchmarkUtils.hpp | 6 +- src/Algorithm/AlgorithmFactory.cpp | 5 + src/Algorithm/CMakeLists.txt | 1 + src/Algorithm/CluStream.cpp | 2 +- src/Algorithm/DBStream.cpp | 6 +- src/Algorithm/DStream.cpp | 1045 +++++++++++++++++ src/Algorithm/DataStructure/CMakeLists.txt | 3 + .../DataStructure/CharacteristicsVector.cpp | 89 ++ src/Algorithm/DataStructure/DensityGrid.cpp | 88 ++ src/Algorithm/DataStructure/GridCluster.cpp | 239 ++++ .../OfflineClustering/ConnectedRegions.cpp | 3 +- src/Algorithm/WindowModel/LandmarkWindow.cpp | 4 +- test/CMakeLists.txt | 1 + test/SystemTest/CluStreamTest.cpp | 18 +- test/SystemTest/DBStreamTest.cpp | 10 +- test/SystemTest/DStreamTest.cpp | 47 + test/SystemTest/DenStreamTest.cpp | 10 +- 23 files changed, 1929 insertions(+), 30 deletions(-) create mode 100644 include/Algorithm/DStream.hpp create mode 100644 include/Algorithm/DataStructure/CharacteristicVector.hpp create mode 100644 include/Algorithm/DataStructure/DensityGrid.hpp create mode 100644 include/Algorithm/DataStructure/GridCluster.hpp create mode 100644 src/Algorithm/DStream.cpp create mode 100644 src/Algorithm/DataStructure/CharacteristicsVector.cpp create mode 100644 src/Algorithm/DataStructure/DensityGrid.cpp create mode 100644 src/Algorithm/DataStructure/GridCluster.cpp create mode 100644 test/SystemTest/DStreamTest.cpp diff --git a/include/Algorithm/Algorithm.hpp b/include/Algorithm/Algorithm.hpp index 6d9f58a0..ad85c403 100644 --- a/include/Algorithm/Algorithm.hpp +++ b/include/Algorithm/Algorithm.hpp @@ -14,7 +14,7 @@ using namespace std; namespace SESAME { -enum algoType { BirchType, StreamKMeansType, CluStreamType, DenStreamType, DBStreamType, EDMStreamType}; +enum algoType { BirchType, StreamKMeansType, CluStreamType, DenStreamType, DBStreamType, EDMStreamType, DStreamType}; class Algorithm; typedef std::shared_ptr AlgorithmPtr; diff --git a/include/Algorithm/DBStream.hpp b/include/Algorithm/DBStream.hpp index b7ab8ac0..95420b14 100644 --- a/include/Algorithm/DBStream.hpp +++ b/include/Algorithm/DBStream.hpp @@ -43,7 +43,7 @@ class DBStream : public Algorithm // unordered_map> connecvtivityGraphId; - //TODO Need to implement weighted a weighted adjacency list S + DBStream(param_t &cmd_params); ~DBStream(); void Initilize() override; diff --git a/include/Algorithm/DStream.hpp b/include/Algorithm/DStream.hpp new file mode 100644 index 00000000..43e7ddc1 --- /dev/null +++ b/include/Algorithm/DStream.hpp @@ -0,0 +1,96 @@ +// +// Created by 1124a on 2021/10/27. +// + +#ifndef SESAME_INCLUDE_ALGORITHM_DSTREAM_HPP_ +#define SESAME_INCLUDE_ALGORITHM_DSTREAM_HPP_ +#include +#include +#include +#include +#include + + +namespace SESAME{ +class DStream; +class DStreamParams : public AlgorithmParameters { + public: + // User defined parameter lambda in damped window + double lambda; + /* User defined parameter: Adjusts the window of protection for + * renaming previously deleted grids as being sporadic + * */ + double beta; + double cm; // User defined parameter: Controls the threshold for dense grids + double cl; // User defined parameter: Controls the threshold for sparse grids + int gridWidth; //width of grid +}; + +typedef std::unordered_map HashMap; +class DStream : public Algorithm + { + public: + DStreamParams dStreamParams; + DampedWindowPtr dampedWindow; + clock_t startTime; + clock_t pointArrivingTime; + clock_t lastAdjustTime; + int gap;// Time gap between calls to the offline component + double dm;// Density threshold for dense grids; controlled by cm + double dl; // Density threshold for sparse grids; controlled by cl + int NGrids;//The number of density grids ,with an initial value 0 + + + //TODO Split the grid list from DStream + HashMap gridList; + std::unordered_map deletedGrids; + std::vector clusterList; // A list of all Grid Clusters + std::vector newClusterList; //A list of grid clusters used when re-clustering an existing cluster. + std::vector minVals; //The minimum value seen for a numerical dimension; used to calculate N + std::vector maxVals; //The maximum value seen for a numerical dimension; used to calculate N + + DStream(param_t &cmd_params); + ~DStream(); + void Initilize() override; + void runOnlineClustering(PointPtr input) override; + void runOfflineClustering(DataSinkPtr sinkPtr) override; + private: + bool clusterInitial = false; + bool isInitial = false; + bool recalculateN =false; // flag indicating whether N needs to be recalculated after this instance + std::vector tempCoord; + std::vector Coord; + void ifReCalculateN(PointPtr point); + void reCalculateN(); + + void GridListUpdate(std::vector coordinate); + + + void initialClustering(); + + void adjustClustering(); + + + bool adjustLabels(); + bool inspectChangedGrids(); + HashMap adjustForSparseGrid(DensityGrid grid, CharacteristicVector characteristicVec, int gridClass); + HashMap adjustForDenseGrid(DensityGrid grid, CharacteristicVector characteristicVec, int gridClass); + HashMap adjustForTransitionalGrid(DensityGrid grid, CharacteristicVector characteristicVec, int gridClass); + void removeSporadic(); + + + HashMap reCluster (GridCluster gridCluster); + HashMap adjustNewLabels(HashMap newGridList); + void mergeClusters(int smallCluster, int bigCluster); + void cleanClusters(); + HashMap cleanNewClusters(HashMap newGridList); + HashMap mergeNewClusters(HashMap newGridList, int smallCluster, int bigCluster); + double densityThresholdFunction(clock_t tg, double cl, double decayFactor, int NGrids); + bool checkIfSporadic(CharacteristicVector characteristicVec); + void updateGridListDensity(); + static void mergeGridList(HashMap gridList, const HashMap &otherList); + }; + +} + +#endif //SESAME_INCLUDE_ALGORITHM_DSTREAM_HPP_ diff --git a/include/Algorithm/DataStructure/CharacteristicVector.hpp b/include/Algorithm/DataStructure/CharacteristicVector.hpp new file mode 100644 index 00000000..1835ce6e --- /dev/null +++ b/include/Algorithm/DataStructure/CharacteristicVector.hpp @@ -0,0 +1,86 @@ +// +// Created by 1124a on 2021/10/27. +// + +#ifndef SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_CHARACTERISTICVECTOR_HPP_ +#define SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_CHARACTERISTICVECTOR_HPP_ +#include +#include +#include +namespace SESAME{ +enum Status{ + NO_CLASS= -1,SPARSE,TRANSITIONAL ,DENSE +}; +class CharacteristicVector { + public: + /** + * t_g: The last time when g is updated + */ + clock_t updateTime; + + /** + * tm : last time when g is removed from grid_list as a sporadic grid (if ever). + */ + clock_t removeTime; + + /** + * D: the grid density at the last update + */ + double gridDensity; + + /** + * label: the cluster label of the grid + */ + int label; + + /** + * status: status = {SPORADIC, NORMAL} + */ + bool isSporadic; + + /** + * attribute: attribute = {SPARSE, TRANSITIONAL, DENSE} + */ + int attribute; + + /** + * time stamp at which the grid's density was last updated (including initial and adjust clustering) + */ + clock_t densityUpdateTime; + + /** + * Flag marking whether there was a change in the attribute field + * the last time the grid density was updated. + */ + bool attChange; + bool isVisited=false; + + CharacteristicVector(); + CharacteristicVector(clock_t updateTime, clock_t removeTime, double Density, int label, bool status, double dl, double dm); + double getCurrGridDensity(clock_t NowTime, double lambda); + + bool isSparse(double dl); + bool isDense(double dm); + bool isTransitional(double dm, double dl); + /** + * Implements the density update function given in + * eq 5 (Proposition 3.1) of Chen and Tu 2007. + * + * @param currTime the data stream's current internal time + * @param decayFactor the value of lambda + */ + void densityWithNew(clock_t NowTime, double decayFactor); + /** + * Implements the update the density of all grids step given at line 2 of + * both Fig 3 and Fig 4 of Chen and Tu 2007. + * + * @param currTime the data stream's current internal time + * @param decayFactor the value of lambda + * @param dl the threshold for sparse grids + * @param dm the threshold for dense grids + */ + void UpdateAllDensity(clock_t NowTime, double decayFactor, double dl, double dm); + void ChangeAttribute(double dl, double dm); +}; +} +#endif //SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_CHARACTERISTICVECTOR_HPP_ diff --git a/include/Algorithm/DataStructure/DensityGrid.hpp b/include/Algorithm/DataStructure/DensityGrid.hpp new file mode 100644 index 00000000..1f976a56 --- /dev/null +++ b/include/Algorithm/DataStructure/DensityGrid.hpp @@ -0,0 +1,105 @@ +// +// Created by 1124a on 2021/10/27. +// +#ifndef SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_DENSITYGRID_HPP_ +#define SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_DENSITYGRID_HPP_ +#include +#include +#include +#include +#include +#include + + +namespace SESAME{ + +class DensityGrid; +typedef std::shared_ptr DensityGridPtr; + +class DensityGrid{ + public: + /** + * For each dimension, its space Si, i =1, ··· ,d is divided into pi partitions as + * Si = Si,1 U Si,2 U ··· U Si,pi + * A density grid g that is composed of S1,j1 ×S2,j2 ···×Sd,jd , ji =1, ...,pi, + * has coordinates (j1,j2, ··· ,jd). + */ + std::vector coordinates; + /** + * The value of 'd' for the d-dimensional space S considered by D-Stream. + */ + int dimensions; + + /** + * Flag denoting whether this density grid has been inspected during the adjustClustering() + * step of D-Stream. + */ + bool isVisited; + /** + * A constructor method for a density grid + * + * @param c the coordinates of the density grid + */ + DensityGrid(); + DensityGrid(std::vector coordin); + + /** + * A constructor method for a density grid + * + * @param dg the density grid to copy + */ + DensityGrid(DensityGrid const &grid); + /** + * Generates a vector of neighbours for this density grid by varying each coordinate + * by one in either direction. Does not test whether the generated neighbours are valid as + * DensityGrid is not aware of the number of partitions in each dimension. + * + * @return a vector of neighbours for this density grid + */ + std::vector getNeighbours(); + + /** + * Provides the probability of the argument instance belonging to the density grid in question. + * + * @return 1.0 if the instance equals the density grid's coordinates; 0.0 otherwise. + */ + + double getInclusionProbability(Point point); + + bool operator==( DensityGrid& gridOther)const; + + + + +}; +struct GridKeyHash{ + std::size_t operator()(const DensityGrid &densityGrid) const + { + //int[] primes = {31, 37, 41, 43, 47, 53, 59}; + int hc = 1; + for (int i = 0 ; i < densityGrid.dimensions ; i++) + { + hc = (hc * 31) + densityGrid.coordinates[i]; + } + + return hc; + } +}; + +struct EqualGrid + { + bool operator() (const DensityGrid &densityGrid1, const DensityGrid &densityGrid2) const + { + if(densityGrid1.dimensions != densityGrid2.dimensions) + return false; + for(int i = 0 ; i < densityGrid1.dimensions ; i++) + { + if(densityGrid1.coordinates[i] != densityGrid2.coordinates[i]) + return false; + } + return true; + } + }; + +} +#endif //SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_DENSITYGRID_HPP_ diff --git a/include/Algorithm/DataStructure/GridCluster.hpp b/include/Algorithm/DataStructure/GridCluster.hpp new file mode 100644 index 00000000..fec600c5 --- /dev/null +++ b/include/Algorithm/DataStructure/GridCluster.hpp @@ -0,0 +1,91 @@ +// +// Created by 1124a on 2021/10/27. +// + +#ifndef SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_GRIDCLUSTER_HPP_ +#define SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_GRIDCLUSTER_HPP_ +#include +#include +#include +#include +namespace SESAME{ +class GridCluster; +typedef std::unordered_map HashGrids; +class GridCluster{ + public: + HashGrids grids; + HashGrids visited; + int clusterLabel; + //Initialize + GridCluster(int label); + GridCluster(); + GridCluster(HashGrids hashMap, int label); + /** + * @param grid the density grid to add to the cluster + */ + void addGrid(DensityGrid grid); + + + /** + * @param dg the density grid to remove from the cluster + */ + void removeGrid(DensityGrid grid); + + /** + * @param gridClus the GridCluster to be absorbed into this cluster + */ + void absorbCluster(GridCluster gridCluster); + /** + * Inside Grids are defined in Definition 3.5 of Chen and Tu 2007 as: + * Consider a grid group G and a grid g ∈ G, suppose g =(j1, ··· ,jd), if g has + * neighboring grids in every dimension i =1, ·· · ,d, then g is an inside grid + * in G.Otherwise g is an outside grid in G. + * + * @param grid the density grid to label as being inside or out + * @return TRUE if g is an inside grid, FALSE otherwise + */ + bool isInside(DensityGrid grid); + + + + /** + * Inside Grids are defined in Definition 3.5 of Chen and Tu 2007 as: + * Consider a grid group G and a grid g ∈ G, suppose g =(j1, ··· ,jd), if g has + * neighboring grids in every dimension i =1, ·· · ,d, then g is an inside grid + * in G. Otherwise g is an outside grid in G. + * + * @param grid the density grid being labelled as inside or outside + * @param other the density grid being proposed for addition + * @return TRUE if g would be an inside grid, FALSE otherwise + */ + bool isInside(DensityGrid grid, DensityGrid other); + + + + /** + * Tests a grid cluster for connectedness according to Definition 3.4, Grid Group, from + * Chen and Tu 2007. + * + * Selects one density grid in the grid cluster as a starting point and iterates repeatedly + * through its neighbours until no more density grids in the grid cluster can be visited. + * + * @return TRUE if the cluster represent one single grid group; FALSE otherwise. + */ + + bool isConnected(); + + + /** + * Iterates through the DensityGrids in the cluster and calculates the inclusion probability for each. + * + * @return 1.0 if instance matches any of the density grids; 0.0 otherwise. + */ + double getInclusionProb(Point point); + bool operator==( GridCluster& Other)const; +}; + + +} + + +#endif //SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_GRIDCLUSTER_HPP_ diff --git a/include/Utils/BenchmarkUtils.hpp b/include/Utils/BenchmarkUtils.hpp index e2e03af0..3610548b 100644 --- a/include/Utils/BenchmarkUtils.hpp +++ b/include/Utils/BenchmarkUtils.hpp @@ -43,7 +43,7 @@ struct param_t { double base; double lambda; double mu; - double beta; + double beta; //Also used in DStream, but different meaning // EDMStream double a; int cacheNum; @@ -55,6 +55,10 @@ struct param_t { int cleanUpInterval; double weightMin; double alpha; + //used in DStream + double cm; + double cl; + int gridWidth; int datasetOption; std::string inputPath; diff --git a/src/Algorithm/AlgorithmFactory.cpp b/src/Algorithm/AlgorithmFactory.cpp index c1a9daca..26199c78 100644 --- a/src/Algorithm/AlgorithmFactory.cpp +++ b/src/Algorithm/AlgorithmFactory.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include SESAME::AlgorithmPtr SESAME::AlgorithmFactory::create(param_t &cmd_params) { @@ -37,6 +38,10 @@ SESAME::AlgorithmPtr SESAME::AlgorithmFactory::create(param_t &cmd_params) { shared_ptr dbStream = std::make_shared(cmd_params); return (SESAME::AlgorithmPtr) dbStream; } + if (cmd_params.algoType == DStreamType) { + shared_ptr dStream = std::make_shared(cmd_params); + return (SESAME::AlgorithmPtr) dStream; + } throw std::invalid_argument("Unsupported"); } diff --git a/src/Algorithm/CMakeLists.txt b/src/Algorithm/CMakeLists.txt index 2b34a2f6..b31c30e4 100644 --- a/src/Algorithm/CMakeLists.txt +++ b/src/Algorithm/CMakeLists.txt @@ -5,6 +5,7 @@ add_source_sesame( DenStream.cpp EDMStream.cpp DBStream.cpp + DStream.cpp Algorithm.cpp AlgorithmFactory.cpp ) diff --git a/src/Algorithm/CluStream.cpp b/src/Algorithm/CluStream.cpp index 31a954f2..f93e4c13 100644 --- a/src/Algorithm/CluStream.cpp +++ b/src/Algorithm/CluStream.cpp @@ -15,8 +15,8 @@ */ SESAME::CluStream::CluStream(param_t &cmd_params) { this->CluStreamParam.pointNumber = cmd_params.pointNumber; - this->CluStreamParam.clusterNumber = cmd_params.onlineClusterNumber; this->CluStreamParam.dimension = cmd_params.dimension; + this->CluStreamParam.clusterNumber = cmd_params.onlineClusterNumber; this->CluStreamParam.lastArrivingNum = cmd_params.lastArrivingNum; this->CluStreamParam.timeWindow = cmd_params.timeWindow; this->CluStreamParam.timeInterval = cmd_params.timeInterval; diff --git a/src/Algorithm/DBStream.cpp b/src/Algorithm/DBStream.cpp index e56fe2af..73bb39de 100644 --- a/src/Algorithm/DBStream.cpp +++ b/src/Algorithm/DBStream.cpp @@ -175,10 +175,10 @@ void SESAME::DBStream::cleanUp(clock_t nowTime){ microClusters.erase(microClusters.begin()+int(iter));//Delete this MC from current MC list } } - SESAME_INFO("now rm MCs number is "<(re, " ")); - SESAME_INFO("RM list "< +#include +#include + +SESAME::DStream::DStream(param_t &cmd_params){ + this->dStreamParams.pointNumber = cmd_params.pointNumber; + this->dStreamParams.dimension = cmd_params.dimension; + this->dStreamParams.lambda=cmd_params.lambda; + this->dStreamParams.cm=cmd_params.cm; + this->dStreamParams.cl=cmd_params.cl; + this->dStreamParams.beta=cmd_params.beta; + this->dStreamParams.gridWidth =cmd_params.gridWidth; +} + +SESAME::DStream:: ~DStream() += default; + +void SESAME::DStream::Initilize() { + this->dampedWindow = WindowFactory::createDampedWindow(dStreamParams.lambda, 1 ); + this->startTime = clock(); + this->pointArrivingTime= clock(); + this->lastAdjustTime= clock(); + //this->dl = dStreamParams.cl/(this->NGrids * (1.0 - dStreamParams.lambda)); + // this->dm = dStreamParams.cm/(this->NGrids * (1.0 - dStreamParams.lambda)); + //Calculate the value for gap using the method defined in eq 26 of Chen and Tu 2007 + // double optionA = dStreamParams.cl/dStreamParams.cm; + // double optionB = ((double)this->NGrids-dStreamParams.cm)/((double)this->NGrids-dStreamParams.cl); +// this->gap = (int)floor(log(max(optionA, optionB))/log(dStreamParams.lambda)); + // Ensure that gap is not zero (i.e. if the procedure to calculate gap rounds down to zero, + // then set gap to 1 and adjust clustering every instance) + // if(this->gap == 0) + // this->gap = 1; + this->gap = 1; + this->dm = -1.0; + this->dl = -1.0; + this->NGrids = 1; + // SESAME_INFO(" A is " <isInitial = true; + for(int i = 0 ; i < dStreamParams.dimension ; i++) + { + maxVals[i] = (int)input->getFeatureItem(i); + minVals[i] = (int)input->getFeatureItem(i); + recalculateN = true; + } + } + else + { + this->pointArrivingTime=clock(); + ifReCalculateN(input); + if (recalculateN) + { + reCalculateN(); + + GridListUpdate(Coord);//tempCoord + // 5. If tc == gap, then initial clustering + // and + // 6. If tc mod gap == 0, then: + //Detect and remove sporadic grids from grid_list then adjust clustering + if ((clock()-startTime)/CLOCKS_PER_SEC/gap==0 &&!clusterInitial) + { + lastAdjustTime=clock(); + initialClustering(); + } + //if(((clock()-lastAdjustTime)/CLOCKS_PER_SEC)>=gap) + if(clusterInitial&&(clock()-startTime)/CLOCKS_PER_SEC%(gap*10)==0) + { + lastAdjustTime=clock(); + removeSporadic(); + adjustClustering(); + } + } + } + } + + +void SESAME::DStream::runOfflineClustering(DataSinkPtr sinkPtr) +{ + SESAME_INFO(" cluster list size "< points; + for( auto iter=0; iter!=this->clusterList.size();iter++) + { + PointPtr point = DataStructureFactory::createPoint(iter, 0, dStreamParams.dimension, 0); + auto count=0; + for(auto &iterGrid: this->clusterList.at(iter).grids) + { + for(int iterDim=0; iterDimsetFeatureItem(0,iterDim); + point->setFeatureItem(point->getFeatureItem(iterDim)+iterGrid.first.coordinates[iterDim],iterDim); + if(count== this->clusterList.at(iter).grids.size()-1) + { + point->setFeatureItem(point->getFeatureItem(iterDim)/ dStreamParams.dimension,iterDim); + } + + } + double weight= gridList.find(iterGrid.first)->second.gridDensity; + point->setWeight(point->getWeight()+weight); + count++; + } + points.push_back(point); + } + for(auto & point : points) + sinkPtr->put(point->copy()); +} + +void SESAME::DStream::ifReCalculateN(PointPtr point) +{ + for (int i = 0 ; i < dStreamParams.dimension; i++) + { + tempCoord[i] = point->getFeatureItem(i); + if (tempCoord[i] > maxVals[i]) + { + maxVals[i] = tempCoord[i]; + recalculateN = true; + } + else if ( tempCoord[i]> minVals[i] ) + { + minVals[i] = tempCoord[i]; + recalculateN = true; + } + Coord[i]= point->getFeatureItem(i)/dStreamParams.gridWidth; + + } +} +void SESAME::DStream::reCalculateN(){ + int curGridNumber = 1; + for (int i = 0 ; i < dStreamParams.dimension ; i++) + { + int gridNum= (maxVals[i]-minVals[i])/dStreamParams.gridWidth; + if(gridNum<=0) + gridNum=1; + curGridNumber = curGridNumber * gridNum; + } + double dlBack= dStreamParams.cl/( curGridNumber * (1.0 - dStreamParams.lambda)); + double dmBack= dStreamParams.cm/(curGridNumber * (1.0 - dStreamParams.lambda)); + if(dlBack<0) + return; + else + { + this->dl = dlBack; + this->dm = dmBack; + this->NGrids = curGridNumber; + // SESAME_INFO(" dl = " << this->dl << ", dm = " << this->dm); + // SESAME_INFO("TOTAL GRIDS ARE "<NGrids); + //Calculate the value for gap using the method defined in eq 26 of Chen and Tu 2007 + double optionA = dStreamParams.cl/dStreamParams.cm; + double optionB = ((double)this->NGrids-dStreamParams.cm)/((double)this->NGrids-dStreamParams.cl); + gap = (int)floor(log(max(optionA, optionB))/log(dStreamParams.lambda)); + // Ensure that gap is not zero (i.e. if the procedure to calculate gap rounds down to zero, + // then set gap to 1 and adjust clustering every instance) + if(gap == 0) + { + gap = 1; + } + } + +} + +/* Update the grid list of DStream when data inserting into the grid + * */ +void SESAME::DStream::GridListUpdate(std::vector coordinate){ + CharacteristicVector characteristicVec; + DensityGrid grid(coordinate); + // 3. If (g not in grid_list) insert dg to grid_list + if(this->gridList.find(grid)==gridList.end()) + { + // SESAME_INFO("3 - this grid wasn't in grid list!"); + if(this->deletedGrids.find(grid)!=deletedGrids.end()) + { + //SESAME_INFO(" but it was in deleted grids!"); + characteristicVec = CharacteristicVector (pointArrivingTime, this->deletedGrids.find(grid)->second, + 1.0, -1, false, dl, dm); + this->deletedGrids.erase(grid); + } + //TODO CHANGR REMOVE TIME FROM 0 TO -1 + else + characteristicVec = CharacteristicVector(pointArrivingTime, -1, 1.0, -1, false, dl, dm); + this->gridList.insert(std::make_pair(grid, characteristicVec)); + // SESAME_INFO(" The size of grid_list is now "<gridList.find(grid)->second; + characteristicVec.densityWithNew(pointArrivingTime, dStreamParams.lambda); + characteristicVec.updateTime=pointArrivingTime; + this->gridList.find(grid)->second=characteristicVec; + } + +} + + + +/** + * Implements the procedure given in Figure 3 of Chen and Tu 2007 + */ + void SESAME::DStream::initialClustering() { + SESAME_INFO("INITIAL CLUSTERING CALLED"); + + // 1. Update the density of all grids in grid_list + updateGridListDensity(); + // 2. Assign each dense grid to a distinct cluster + // and + // 3. Label all other grids as NO_CLASS + auto gridIter = this->gridList.begin(); + HashMap newGridList; + while(gridIter!=gridList.end()) + { + DensityGrid grid = gridIter->first; + CharacteristicVector characteristicVecOfG = gridIter->second; + + if(characteristicVecOfG.attribute == DENSE) + { + int gridClass = this->clusterList.size(); + characteristicVecOfG.label=gridClass; + GridCluster gridCluster = GridCluster(gridClass); + gridCluster.addGrid(grid); + if(std::find(this->clusterList.begin(),this->clusterList.end(),gridCluster)==this->clusterList.end()) + this->clusterList.push_back(gridCluster); + // SESAME_INFO(" was dense (class "<clusterList.size()); + } + else + characteristicVecOfG.label=NO_CLASS; + newGridList.insert(std::make_pair(grid,characteristicVecOfG)); + gridIter++; + } + this->gridList = newGridList; + // 4. Make changes to grid labels by doing: + // a. For each cluster c + // b. For each outside grid g of c + // c. For each neighbouring grid h of g + // d. If h belongs to c', label c and c' with + // the label of the largest cluster + // e. Else if h is transitional, assign it to c + // f. While changes can be made + + bool changesMade; + + do{ + changesMade = adjustLabels(); + }while(changesMade); // while changes are being made + SESAME_INFO("INITIAL CLUSTERING FINISHED"); + clusterInitial = true; +} +/** + * Makes first change available to it by following the steps: + * For each cluster c< + * For each outside grid g of c + * For each neighbouring grid h of g + * If h belongs to c', label c and c' with the label of the largest cluster + * Else if h is transitional, assign it to c + * @return TRUE if a change was made to any cluster's labels, FALSE otherwise + */ +bool SESAME::DStream::adjustLabels() +{ + //bool adjust=false; + // a. For each cluster c + for (GridCluster& gridCluster : this->clusterList) + { + // SESAME_INFO("Adjusting from cluster "<first; + bool inside = gridIter->second; + // SESAME_INFO(" Inspecting density grid, grid, standby..."); + + // b. for each OUTSIDE grid of cluster + if (!inside) + { + // SESAME_INFO(" Density grid dg is outside!"); + // c. for each neighbouring grid, of current iter grid + for (const DensityGrid& gridNeighbourhood : grid.getNeighbours()) + { + if(this->gridList.find(gridNeighbourhood)!= gridList.end()) + { + CharacteristicVector characteristicVec1 = this->gridList.find(grid)->second; + CharacteristicVector characteristicVec2 = this->gridList.find(gridNeighbourhood)->second; + //System.out.print(" 1: "+cv1.toString()+", 2: "+cv2.toString()); + int class1 = characteristicVec1.label; + int class2 = characteristicVec2.label; + // ...and if neighbouring grid isn't already in the same cluster as grid... + if (class1 != class2) + { + // If neighbouring grid is in cluster c', merge c and c' into the larger of the two + if (class2 != NO_CLASS) + { + if ( this->clusterList.at(class1).grids.size() < this->clusterList.at(class2).grids.size()) + mergeClusters(class1, class2); + else + mergeClusters(class2, class1); + return true; + } + // If gridNeighbourhood is transitional and 'outside' of the cluster, assign it to cluster + else if (characteristicVec2.isTransitional(dm, dl)) + { + //SESAME_INFO("this grid is transitional and is assigned to cluster "<clusterList.at(class1) = gridCluster; + if(this->gridList.find(grid)!=gridList.end()) + this->gridList.find(grid)->second=characteristicVec2; + else + this->gridList.insert(std::make_pair(grid, characteristicVec2)); + return true; + } + } + } + } + } + } + } + } + return false; +} + + + + +/** + * Iterates through grid_list and updates the density for each density grid therein. + * Also marks each density grid as unvisited for this call to adjustClustering. + */ + void SESAME::DStream::updateGridListDensity() +{ + // SESAME_INFO("grid list size is "<gridList.size()); + HashMap gridListBack; + for (auto & iter : this->gridList) + { + DensityGrid grid = iter.first; + CharacteristicVector cvOfGrid = iter.second; + // grid.isVisited = false; + iter.second.isVisited=false; + cvOfGrid.UpdateAllDensity(pointArrivingTime, dStreamParams.lambda, dl, dm); + iter.second = cvOfGrid; + gridListBack.insert(std::make_pair(grid, cvOfGrid)); + } + //this->gridList.clear(); + this->gridList=gridListBack; + //SESAME_INFO("grid list size is "<gridList.size()); +} + + +/** + * Performs the periodic adjustment of clusters every 'gap' timesteps. + * Implements the procedure given in Figure 4 of Chen and Tu 2007 + * + * @see moa.clusterers.dstream.Dstream#gap + */ +void SESAME::DStream::adjustClustering() { + SESAME_INFO("ADJUST CLUSTERING CALLED "); + // 1. Update the density of all grids in grid_list + updateGridListDensity(); + // 2. For each grid dg whose attribute is changed since last call + // a. If dg is sparse + // b. If dg is dense + // c. If dg is transitional + bool changesMade = false; + do{ + changesMade=inspectChangedGrids(); + }while(changesMade); + +} + + +/** + * Inspects each density grid in grid_list whose attribute has changed since the last + * call to adjustClustering. Implements lines 3/4/7/19 of the procedure given in Figure + * 4 of Chen and Tu 2007. + * + * @return TRUE if any grids are updated; FALSE otherwise. + */ +bool SESAME::DStream::inspectChangedGrids(){ + + HashMap newGridList; + auto gridIter = this->gridList.begin(); + int a=0; + while (gridIter!=gridList.end() )//&& newGridList.empty() + { + DensityGrid grid = gridIter->first; + CharacteristicVector characteristicVec = gridIter->second; + int gridClass =characteristicVec.label; + if(characteristicVec.attChange && !characteristicVec.isVisited)//grid.isVisited + { //grid.isVisited=true; + gridIter->second.isVisited=true; + // SESAME_INFO(a<<"th visit!"<second.isVisited); + newGridList.insert(std::make_pair(grid, characteristicVec)); + if (characteristicVec.attribute == SPARSE) + mergeGridList(newGridList,adjustForSparseGrid(grid, characteristicVec, gridClass)); + else if (characteristicVec.attribute == DENSE) + mergeGridList(newGridList,adjustForDenseGrid(grid, characteristicVec, gridClass)); + else // TRANSITIONAL + mergeGridList(newGridList,adjustForTransitionalGrid(grid, characteristicVec, gridClass)); + } + gridIter++;a++; + } + SESAME_INFO("Inspect changes in grids "<gridList,newGridList); + cleanClusters(); + return true; + } + else + return false; +} + +/** + * Adjusts the clustering of a sparse density grid. Implements lines 5 and 6 from Figure 4 of Chen and Tu 2007. + * + * @param dg the sparse density grid being adjusted + * @param cv the characteristic vector of dg + * @param dgClass the cluster to which dg belonged + * + * @return a HashMap containing density grids for update after this iteration + */ +SESAME::HashMap SESAME::DStream::adjustForSparseGrid(DensityGrid grid, + CharacteristicVector characteristicVec, int gridClass) +{ + HashMap newGridList; + //System.out.print("Density grid "+dg.toString()+" is adjusted as a sparse grid at time "+this.getCurrTime()+". "); + if (gridClass != NO_CLASS) + { + //SESAME_INFO("It is removed from cluster "<clusterList.at(gridClass); + gridCluster.removeGrid(grid); + characteristicVec.label = NO_CLASS; + newGridList.insert(std::make_pair(grid, characteristicVec)); + this->clusterList.at(gridClass)= gridCluster; + if(!gridCluster.grids.empty() && !gridCluster.isConnected()) + mergeGridList(newGridList,reCluster(gridCluster)); + } + //else + //System.out.println("It was not clustered ("+dgClass+")."); + return newGridList; +} + + +/** +* Reclusters a grid cluster into two (or more) constituent clusters when it has been identified that the original cluster +* is no longer a grid group. It does so by echoing the initial clustering procedure over only those grids in gc. +* @param gridCluster the grid cluster to be re clustered +* @return a HashMap containing density grids for update after this iteration +*/ +SESAME::HashMap SESAME::DStream::reCluster(GridCluster gridCluster) +{ + SESAME_INFO("Now re-cluster!"); + HashMap newGridList; + auto gcIter = gridCluster.grids.begin(); + // SESAME_INFO("ReCluster called for cluster "<first; + CharacteristicVector characteristicVecOfGrid = this->gridList.find(grid)->second; + if(characteristicVecOfGrid.attribute == DENSE) + { + int gridClass = (int) newClusterList.size(); + characteristicVecOfGrid.label = gridClass; + GridCluster newCluster(gridClass); + newCluster.addGrid(grid); + newClusterList.push_back(newCluster); + } + else + characteristicVecOfGrid.label = NO_CLASS; + newGridList.insert(std::make_pair(grid, characteristicVecOfGrid)); + gcIter++; + } + + bool changesMade; + // While changes can be made... + do + { + changesMade = false; + HashMap gridListAdjusted = adjustNewLabels(newGridList); + if(!gridListAdjusted.empty()) + { + SESAME_INFO("grid list is adjusted for sparse!"); + mergeGridList(newGridList,gridListAdjusted); + changesMade = true; + } + }while(changesMade); + + // Update the cluster list with the newly formed clusters + gridCluster.grids.clear(); + this->clusterList.at(gridCluster.clusterLabel)= gridCluster; + for (GridCluster & cluster : newClusterList) + this->clusterList.push_back(cluster); + return newGridList; +} + + +SESAME::HashMap SESAME::DStream::adjustNewLabels(SESAME::HashMap newGridList) +{ + HashMap gridListAdjusted; + // a. For each cluster c + for (GridCluster &gridCluster : newClusterList) + { + for (auto &gridIter : gridCluster.grids) + { + DensityGrid grid = gridIter.first; + bool inside = gridIter.second; + + // b. for each OUTSIDE grid, dg, of c + if (!inside) + { + // c. for each neighbouring grid, neighbourGrid, of dg + + for(DensityGrid & neighbourGrid: grid.getNeighbours()) + { + if(newGridList.find(neighbourGrid)!= newGridList.end()) + { + CharacteristicVector characteristicVec1 = newGridList.find(grid)->second; + CharacteristicVector characteristicVec2 = newGridList.find(neighbourGrid)->second; + int class1 = characteristicVec1.label; + int class2 = characteristicVec2.label; + + // ...and if neighbourGrid isn't already in the same cluster as dg... + if (class1 != class2) + { + GridCluster cluster1 = newClusterList.at(class1); + // If dgprime is in cluster c', merge c and c' into the larger of the two + if (class2 != NO_CLASS) + { + GridCluster cluster2 = newClusterList.at(class2); + //System.out.println("C is "+class1+" and C' is "+class2+"."); + if (cluster1.grids.size()< cluster2.grids.size()) + mergeGridList(gridListAdjusted,mergeNewClusters(newGridList, class1, class2)); + else + mergeGridList(gridListAdjusted,mergeNewClusters(newGridList, class2, class1)); + + return gridListAdjusted; + } + // If neighbourGrid is transitional and outside of cluster, assign it to cluster + else if (characteristicVec2.isTransitional(dm, dl)) + { + characteristicVec2.label = class1; + cluster1.addGrid(neighbourGrid); + this->newClusterList.at(class1) = cluster1; + gridListAdjusted.insert(std::make_pair(neighbourGrid, characteristicVec2)); + return gridListAdjusted; + } + } + } + } + } + } + } + return gridListAdjusted; +} + + + +/** + * Adjusts the clustering of a dense density grid. Implements lines 8 through 18 from Figure 4 of Chen and Tu 2007. + * + * @param grid the dense density grid being adjusted + * @param characteristicVec the characteristic vector of dg + * @param gridClass the cluster to which dg belonged + * + * @return a HashMapcontaining density grids for update after this iteration + */ +SESAME::HashMap SESAME::DStream::adjustForDenseGrid(DensityGrid grid, + CharacteristicVector characteristicVec, int gridClass) +{ + // Among all neighbours of dg, find the grid h whose cluster ch has the largest size + GridCluster gridCluster; // The cluster, ch, of h + DensityGrid gridChosen(grid); // The chosen grid h, whose cluster ch has the largest size + double ChosenGridSize = -1.0; // The size of gridCluster, the largest cluster + int hClass = NO_CLASS; // The class label of h + int hChosenClass = NO_CLASS; // The class label of ch + + HashMap newGridList; + //SESAME_INFO("adjust For Dense Grid "<gridList.find(neighbourGrid)!=gridList.end()) + { + hClass = this->gridList.find(neighbourGrid)->second.label; + if (hClass != NO_CLASS) + { + gridCluster = this->clusterList.at(hClass); + + if (gridCluster.grids.size() > ChosenGridSize) + { + ChosenGridSize = gridCluster.grids.size(); + hChosenClass = hClass; + gridChosen = DensityGrid(neighbourGrid); + } + } + } + } + + if (hChosenClass != NO_CLASS && hChosenClass != gridClass) + { + gridCluster = this->clusterList.at(hChosenClass); + + // If h is a dense grid + if (this->gridList.find(gridChosen)->second.attribute == DENSE) + { + // SESAME_INFO("h is dense."); + // If dg is labelled as NO_CLASS + if(gridClass == NO_CLASS) + { + // SESAME_INFO("g was labelled NO_CLASS"); + characteristicVec.label = hChosenClass; + newGridList.insert(std::make_pair(grid, characteristicVec)); + gridCluster.addGrid(grid); + this->clusterList.at(hChosenClass) = gridCluster; + + } + // Else if dg belongs to cluster c and h belongs to c' + else + { + // SESAME_INFO("g was labelled "<clusterList.at(gridClass).grids.size(); + + if (gSize <= ChosenGridSize) + mergeClusters(gridClass, hChosenClass); + else + mergeClusters(hChosenClass, gridClass); + } + } + + // Else if h is a transitional grid + else if (this->gridList.at(gridChosen).attribute == TRANSITIONAL) + { + // SESAME_INFO("h is transitional."); + // If dg is labelled as no class and if h is an outside grid if dg is added to ch + if (gridClass == NO_CLASS && !gridCluster.isInside(gridChosen, grid)) + { + characteristicVec.label = hChosenClass; + newGridList.insert(std::make_pair(grid, characteristicVec)); + gridCluster.addGrid(grid); + this->clusterList.at(hChosenClass) = gridCluster; + // SESAME_INFO(" dg is added to cluster "<= |ch| + else if (gridClass != NO_CLASS) + { + GridCluster c = this->clusterList.at(gridClass); + double gSize = c.grids.size(); + + if (gSize >= ChosenGridSize) + { + // Move h from cluster ch to cluster c + gridCluster.removeGrid(gridChosen); + c.addGrid(gridChosen); + CharacteristicVector cvhChosen = this->gridList.find(gridChosen)->second; + cvhChosen.label = gridClass; + newGridList.insert(std::make_pair(gridChosen, cvhChosen)); + // SESAME_INFO("dgClass is "<clusterList.at(hChosenClass) = gridCluster; + this->clusterList.at(gridClass) = c; + } + } + } + } + // If dgClass is dense and not in a cluster, and none if its neighbours are in a cluster, + // put it in its own new cluster and search the neighbourhood for transitional or dense + // grids to add + else if (gridClass == NO_CLASS) + { + int newClass = (int) this->clusterList.size(); + GridCluster c = GridCluster(newClass); + c.addGrid(grid); + //System.out.println("Added "+dg.toString()+" to cluster "+newClass+"."); + this->clusterList.push_back(c); + characteristicVec.label = newClass; + if(newGridList.find(grid)!=newGridList.end()) + newGridList.find(grid)->second= characteristicVec; + else + newGridList.insert(std::make_pair(grid, characteristicVec)); + // Iterate through the neighbourhood until no more transitional neighbours can be added + // (dense neighbours will add themselves as part of their adjust process) + for (DensityGrid &dghprime: grid.getNeighbours()) + { + if (this->gridList.find(dghprime)!=this->gridList.end() && c.grids.find(dghprime)!=c.grids.end() ) + { + CharacteristicVector cvhprime = this->gridList.find(dghprime)->second; + if(cvhprime.attribute == TRANSITIONAL) + { + c.addGrid(dghprime); + cvhprime.label = newClass ; + newGridList.insert(std::make_pair(dghprime, cvhprime)); + } + } + } + this->clusterList.at(newClass) = c; + } + + return newGridList; +} + + + +/** + * Adjusts the clustering of a transitional density grid. Implements lines 20 and 21 from Figure 4 of Chen and Tu 2007. + * + * @param dg the dense density grid being adjusted + * @param cv the characteristic vector of dg + * @param dgClass the cluster to which dg belonged + * + * @return a HashMap containing density grids for update after this iteration + */ +SESAME::HashMap SESAME::DStream::adjustForTransitionalGrid(DensityGrid grid, CharacteristicVector characteristicVec, + int gridClass) +{ + // Among all neighbours of dg, find the grid h whose cluster ch has the largest size + // and satisfies that dg would be an outside grid if added to it + GridCluster gridCluster; // The cluster, ch, of h + double hChosenSize = 0.0; // The size of ch, the largest cluster + DensityGrid neighbourGrid; // The neighbour of dg being considered + int hClass = NO_CLASS; // The class label of h + int hChosenClass = NO_CLASS; // The class label of ch + HashMap newGridList; + //SESAME_INFO("adjust For Transitional Grid "<gridList.find(neighbourGrid)!= gridList.end()) + { + hClass =this->gridList.find(neighbourGrid)->second.label;; + if (hClass != NO_CLASS) + { + gridCluster = this->clusterList.at(hClass); + + if ((gridCluster.grids.size()> hChosenSize) && !gridCluster.isInside(grid, grid)) + { + hChosenSize = gridCluster.grids.size(); + hChosenClass = hClass; + } + } + } + } + + //System.out.println(" Chosen neighbour is from cluster "+hChosenClass+", dgClass is "+dgClass+"."); + + if (hChosenClass != NO_CLASS && hChosenClass != gridClass) + { + gridCluster = this->clusterList.at(hChosenClass); + gridCluster.addGrid(grid); + this->clusterList.at(hChosenClass) = gridCluster; + + if(gridClass != NO_CLASS) + { + GridCluster c = this->clusterList.at(gridClass); + c.removeGrid(grid); + this->clusterList.at(gridClass) = c; + } + + characteristicVec.label = hChosenClass ; + newGridList.insert(std::make_pair(grid, characteristicVec)); + } + + return newGridList; +} + + + + +SESAME::HashMap SESAME::DStream::mergeNewClusters(SESAME::HashMap newGridList, int smallCluster, int bigCluster) +{ + //System.out.println("Merge new clusters "+smallCluster+" and "+bigCluster+"."); + // Iterate through the density grids in grid_list to find those which are in highClass + for (HashMap::iterator gridIter =newGridList.begin(); gridIter != gridList.end(); gridIter++) + { + DensityGrid grid = gridIter->first; + CharacteristicVector characteristicVec = gridIter->second; + + // Assign density grids in small Cluster to bigCluster + if(characteristicVec.label == smallCluster) + { + characteristicVec.label = bigCluster; + newGridList.insert(std::make_pair(grid, characteristicVec)); + } + } + SESAME_INFO("Density grids assigned to cluster "<newClusterList.at(bigCluster); + bGC.absorbCluster(this->newClusterList.at(smallCluster)); + this->newClusterList.at(bigCluster) = bGC; + this->newClusterList.erase(this->newClusterList.begin()+smallCluster); + //System.out.println("Cluster "+smallClus+" removed from list."); + newGridList = cleanNewClusters(newGridList); + + return newGridList; +} + +/** + * Determines whether a sparse density grid is sporadic using rules S1 and S2 of Chen and Tu 2007 + * + * @param characteristicVec - the CharacteristicVector of the density grid being assessed for sporadicity + */ +bool SESAME::DStream::checkIfSporadic(CharacteristicVector characteristicVec) +{ + // Check S1 + if(characteristicVec.getCurrGridDensity(pointArrivingTime,dStreamParams.lambda) < + densityThresholdFunction(characteristicVec.densityUpdateTime, dStreamParams.cl, dStreamParams.lambda, this->NGrids)) + { + // Check S2 + if(characteristicVec.removeTime == 0 || + (pointArrivingTime- ((1 + dStreamParams.beta)*characteristicVec.removeTime))/CLOCKS_PER_SEC >=0) + return true; + } + + return false; +} + + +/** + * Implements the function pi given in Definition 4.1 of Chen and Tu 2007 + * + * @param tg - the update time in the density grid's characteristic vector + * @param cl - user defined parameter which controls the threshold for sparse grids + * @param lambda - see lambda definition + * @param NGrids - the number of density grids, + */ +double SESAME::DStream::densityThresholdFunction(clock_t tg, double cl, double lambda, int NGrids) +{ + return (cl * (1.0 - dampedWindow->decayFunction(clock()+CLOCKS_PER_SEC,tg)))/(NGrids * (1.0 - lambda)); +} + +/** + * Reassign all grids belonging in the small cluster to the big cluster + * Merge the GridCluster objects representing each cluster + * + * @param smallCluster - the index of the smaller cluster + * @param bigCluster - the index of the bigger cluster + */ +void SESAME::DStream::mergeClusters(int smallCluster, int bigCluster){ + SESAME_INFO("Merge clusters "<first; + CharacteristicVector characteristicVec = gridIter->second; + + // Assign density grids in smallCluster to bigCluster + if(characteristicVec.label == smallCluster) + { + characteristicVec.label = bigCluster; + gridIter->second = characteristicVec; + } + } + SESAME_INFO("Density grids assigned to cluster "<clusterList.at(bigCluster); + bigGridCluster.absorbCluster(this->clusterList.at(smallCluster)); + this->clusterList.at(bigCluster)= bigGridCluster; + this->clusterList.erase(clusterList.begin()+smallCluster); + SESAME_INFO("Cluster "<second= gridIter.second; + + else + thisGridList.insert(std::make_pair(gridIter.first, gridIter.second)); + } +} + +SESAME::HashMap SESAME::DStream::cleanNewClusters(SESAME::HashMap newGridList) +{ + std::vector toRemove; + // Check to see if there are any empty clusters + for(auto & cluster : this->newClusterList) + { + + if(cluster.grids.empty()) + toRemove.push_back(cluster); + } + + // Remove empty clusters + if (!toRemove.empty()) + { + for(auto & RemoveCluster : toRemove) + { + auto removeCIter = std::find(newClusterList.begin(), newClusterList.end(), RemoveCluster); + if( std::find(newClusterList.begin(), newClusterList.end(), RemoveCluster) != newClusterList.end()) + this->newClusterList.erase(removeCIter); + } + } + for(auto & cluster : this->newClusterList) + { + auto clusterIter = std::find(newClusterList.begin(), newClusterList.end(),cluster ); + int index = (int) std::distance(newClusterList.begin(), clusterIter); + cluster.clusterLabel = index; + for( auto & gridOfCluster :cluster.grids) + { + DensityGrid grid = gridOfCluster.first; + CharacteristicVector characteristicVec = newGridList.find(grid)->second; + characteristicVec.label = index; + newGridList.insert(std::make_pair(grid, characteristicVec)); + } + } + SESAME_INFO("Clean finish!"); + return newGridList; +} + + +//TODO stop right here and confused about get() + +/** + * Iterates through cluster_list to ensure that all empty clusters have been removed and + * that all cluster IDs match the cluster's index in cluster_list. + */ + +void SESAME::DStream::cleanClusters() +{ + //SESAME_INFO("Clean Clusters"); + + std::vector toRemove; + + // Check to see if there are any empty clusters + for(auto & cluster : this->clusterList) + { + if(cluster.grids.empty()) + toRemove.push_back(cluster); + } + // Remove empty clusters + if (!toRemove.empty()) + { + for(auto & RemoveCluster : toRemove) + { + auto removeCIter = std::find(newClusterList.begin(), newClusterList.end(), RemoveCluster); + if( std::find(newClusterList.begin(), newClusterList.end(), RemoveCluster) != newClusterList.end()) + this->clusterList.erase(removeCIter); + } + } + // Adjust remaining clusters as necessary + for(auto & cluster : this->clusterList) + { + auto clusterIter = std::find(clusterList.begin(), clusterList.end(),cluster ); + int index = (int) std::distance(clusterList.begin(), clusterIter); + cluster.clusterLabel = index; + this->clusterList.at(index) = cluster ; + for( auto & gridOfCluster :cluster.grids) + { + DensityGrid grid = gridOfCluster.first; + CharacteristicVector characteristicVec = gridList.find(grid)->second; + if(characteristicVec.label == -1) + { + SESAME_INFO("Warning, cv is null for this grid from cluster "<second = characteristicVec; + else + gridList.insert(std::make_pair(grid, characteristicVec)); + } + } +} + + + +/** + * Implements the procedure described in section 4.2 of Chen and Tu 2007 + * For each grid g in grid_list + a. If g is sporadic + i. If currTime - tg > gap, delete g from grid_list + ii. Else if (S1 && S2), mark as sporadic + iii. Else, mark as normal + b. Else + i. If (S1 && S2), mark as sporadic + */ +void SESAME::DStream::removeSporadic() { + SESAME_INFO("REMOVE SPORADIC CALLED"); + // For each grid g in grid_list + + HashMap newGridList; + std::vector removeGridList ; + for( auto & gridIter : this->gridList) + { + DensityGrid grid = gridIter.first; + CharacteristicVector characteristicVec = gridIter.second ; + // If g is sporadic + if (characteristicVec.isSporadic) + { + // If currTime - tg > gap, delete g from grid_list + if ((clock() - characteristicVec.updateTime)/CLOCKS_PER_SEC >= gap) + { + int gridClass = characteristicVec.label; + + if (gridClass != -1) + this->clusterList.at(gridClass).removeGrid(grid); + removeGridList.push_back(grid); + } + // Else if (S1 && S2), mark as sporadic - Else mark as normal + else + { + characteristicVec.isSporadic = checkIfSporadic(characteristicVec); + newGridList.insert(std::make_pair(grid, characteristicVec)); + } + + } + // Else if (S1 && S2), mark as sporadic + else + { + characteristicVec.isSporadic = checkIfSporadic(characteristicVec); + newGridList.insert(std::make_pair(grid, characteristicVec)); + } + } + mergeGridList(gridList, newGridList); + + SESAME_INFO(" - Removed "<deletedGrids.insert(std::make_pair(sporadicGrid, clock())); + this->gridList.erase(sporadicGrid); + } +} + + + + + + diff --git a/src/Algorithm/DataStructure/CMakeLists.txt b/src/Algorithm/DataStructure/CMakeLists.txt index 4b876647..80c982b8 100644 --- a/src/Algorithm/DataStructure/CMakeLists.txt +++ b/src/Algorithm/DataStructure/CMakeLists.txt @@ -12,4 +12,7 @@ add_source_sesame( Cache.cpp OutlierResevoir.cpp FeatureVector.cpp + CharacteristicsVector.cpp + DensityGrid.cpp + GridCluster.cpp ) \ No newline at end of file diff --git a/src/Algorithm/DataStructure/CharacteristicsVector.cpp b/src/Algorithm/DataStructure/CharacteristicsVector.cpp new file mode 100644 index 00000000..b5fd8923 --- /dev/null +++ b/src/Algorithm/DataStructure/CharacteristicsVector.cpp @@ -0,0 +1,89 @@ +// +// Created by 1124a on 2021/10/27. +// + +#include +SESAME::CharacteristicVector::CharacteristicVector(){ + +} +SESAME::CharacteristicVector::CharacteristicVector(clock_t updateTime, clock_t removeTime, double Density, + int label, bool status, double dl, double dm) +{ + this->updateTime=updateTime; + this->removeTime=removeTime; + this->gridDensity=Density; + this->densityUpdateTime=updateTime; + this->label=label; + this->isSporadic=status; + if (this->isSparse(dl)) + this->attribute = SPARSE; + else if (this->isDense(dm)) + this->attribute = DENSE; + else + this->attribute = TRANSITIONAL; + this->attChange=false; +} + + + +bool SESAME::CharacteristicVector::isSparse(double dl) +{ + if(this->gridDensity<=dl) + return true; + else + return false; +} + +bool SESAME::CharacteristicVector::isDense(double dm){ + if(this->gridDensity>=dm) + return true; + else + return false; +} +bool SESAME::CharacteristicVector::isTransitional(double dm, double dl){ + if(this->gridDensity>=dl && this->gridDensity<=dm) + return true; + else + return false; +} +double SESAME::CharacteristicVector::getCurrGridDensity(clock_t NowTime, double lambda) +{ + return pow(lambda, (double)(NowTime-this->updateTime)/CLOCKS_PER_SEC) * this->gridDensity; +} +void SESAME::CharacteristicVector::densityWithNew(clock_t NowTime, double decayFactor) +{ + // Update the density grid's density + this->gridDensity = getCurrGridDensity(NowTime,decayFactor)+1.0; + //System.out.println(densityOfG); + this->densityUpdateTime=NowTime; +} +void SESAME::CharacteristicVector::UpdateAllDensity(clock_t NowTime, double decayFactor, double dl, double dm) +{ + + // record the last attribute + int lastAtt = this->attribute; + // Update the density grid's density + this->gridDensity = getCurrGridDensity(NowTime,decayFactor); + this->densityUpdateTime=NowTime; + + // Evaluate whether the density grid is now SPARSE, DENSE or TRANSITIONAL + if (this->isSparse(dl)) + this->attribute = SPARSE; + else if (this->isDense(dm)) + this->attribute = DENSE; + else + this->attribute = TRANSITIONAL; + // Evaluate whether the density grid attribute has changed and set the attChange flag accordingly + if (this->attribute == lastAtt) + this->attChange = false; + else + this->attChange = true; +} +void SESAME::CharacteristicVector::ChangeAttribute(double dl, double dm){ + if (this->isSparse(dl)) + this->attribute = SPARSE; + else if (this->isDense(dm)) + this->attribute = DENSE; + else + this->attribute = TRANSITIONAL; +} \ No newline at end of file diff --git a/src/Algorithm/DataStructure/DensityGrid.cpp b/src/Algorithm/DataStructure/DensityGrid.cpp new file mode 100644 index 00000000..cb8b2b6b --- /dev/null +++ b/src/Algorithm/DataStructure/DensityGrid.cpp @@ -0,0 +1,88 @@ +// +// Created by 1124a on 2021/10/27. +// +#include +#include + +SESAME::DensityGrid::DensityGrid(){ + +} +SESAME::DensityGrid::DensityGrid(std::vector coordin) +{ + this->dimensions = coordin.size(); + this->coordinates= std::vector (this->dimensions,0) ; + for (int i = 0 ; i < this->dimensions ; i++) + this->coordinates[i] = coordin[i]; + this->isVisited = false; +} + +SESAME::DensityGrid::DensityGrid(DensityGrid const &grid) +{ + std::vector coord = grid.coordinates; + this->dimensions = grid.dimensions; + this->coordinates= std::vector (this->dimensions,0) ; + for (int i = 0 ; i < this->dimensions ; i++) + this->coordinates[i] = coord[i]; + this->isVisited = false; +} + +/** + * Generates a vector of neighbours for this density grid by varying each coordinate + * by one in either direction. Does not test whether the generated neighbours are valid as + * DensityGrid is not aware of the number of partitions in each dimension. + * + * @return a vector of neighbours for this density grid + */ + +std::vector SESAME::DensityGrid::getNeighbours() +{ +// SESAME_INFO("Obtain neighbours"); + std::vector neighbours; + std::vector hCoord = this->coordinates ; + for (int i = 0 ; i < this->dimensions ; i++) + { + hCoord[i] = hCoord[i]-1; + DensityGrid grid(hCoord); + neighbours.push_back(grid); + + hCoord[i] = hCoord[i]+2; + DensityGrid grid2(hCoord); + neighbours.push_back(grid2); + + hCoord[i] = hCoord[i]-1; + } + + return neighbours; +} + + +/** + * Provides the probability of the argument instance belonging to the density grid in question. + * + * @return 1.0 if the instance equals the density grid's coordinates; 0.0 otherwise. + */ + + +double SESAME::DensityGrid::getInclusionProbability(Point point) { + for (int i = 0 ; i < this->dimensions ; i++) + { + if ((int) point.getFeatureItem(i) != this->coordinates[i]) + return 0.0; + } + return 1.0; +} + +bool SESAME::DensityGrid::operator==( DensityGrid& gridOther)const { + bool equal=true; + if (this == &gridOther) { + equal=true; + } + if(this->dimensions != gridOther.dimensions) + equal = false; + for(int i = 0 ; i < this->dimensions ; i++) + { + if(this->coordinates[i] != gridOther.coordinates[i]) + equal= false; + } + return equal; +} \ No newline at end of file diff --git a/src/Algorithm/DataStructure/GridCluster.cpp b/src/Algorithm/DataStructure/GridCluster.cpp new file mode 100644 index 00000000..7469a566 --- /dev/null +++ b/src/Algorithm/DataStructure/GridCluster.cpp @@ -0,0 +1,239 @@ +// +// Created by 1124a on 2021/10/27. +// + +#include +#include + +SESAME::GridCluster::GridCluster() +{ + +} +SESAME::GridCluster::GridCluster( int label) +{ + this->clusterLabel = label; +} +//TODO: if Using this function, be careful when grids are not NULL +SESAME::GridCluster::GridCluster(HashGrids hashMap, int label) +{ + HashGrids::iterator iterW; + for (iterW = hashMap.begin(); iterW != hashMap.end(); iterW++) + { + DensityGrid grid = iterW->first; + bool inside = iterW->second; + this->grids.insert(std::make_pair(grid, inside)); + } + this->clusterLabel = label; +} + +/** + * @param grid the density grid to add to the cluster + */ + + +void SESAME::GridCluster::addGrid(DensityGrid grid) +{ + bool inside = isInside(grid); + if(this->grids.find(grid)!=this->grids.end()) + this->grids.find(grid)->second=inside; + else + this->grids.insert(std::make_pair(grid,inside)); + HashGrids::iterator iterW; + //Iterate on grids and judge whether they are inside grids or not + for (iterW = this->grids.begin(); iterW != this->grids.end(); iterW++) + { + bool inside2U = iterW->second; + if(!inside2U) + { + DensityGrid dg2U = iterW->first; + iterW->second=isInside(dg2U); + } + } +} + +/** + * @param dg the density grid to remove from the cluster + */ +void SESAME::GridCluster::removeGrid(DensityGrid grid) +{ + this->grids.erase(grid); +} + + +/** + * @param gridClus the GridCluster to be absorbed into this cluster + */ +void SESAME::GridCluster::absorbCluster(GridCluster gridCluster) +{ + bool inside; + SESAME::HashGrids newCluster; + SESAME_INFO("Absorb cluster "<< gridCluster.clusterLabel <<" into cluster "<clusterLabel<<"."); + + // Add each density grid from gridCluster into this->grids + auto grid=gridCluster.grids.begin(); + while ( grid != gridCluster.grids.end()) + { + //TODO whether they have same grids? + this->grids.insert(std::make_pair(grid->first, false)); + grid++; + } + SESAME_INFO("...density grids added"); + //Determine which density grids in this.grids are 'inside' and which are 'outside' + auto thisGrid=this->grids.begin(); + while( thisGrid != this->grids.end()) + { + inside = isInside(thisGrid->first); + if(newCluster.find(thisGrid->first)!= newCluster.end()) + { + newCluster.find(thisGrid->first)->second=inside; + } + else + { + newCluster.insert(std::make_pair(thisGrid->first, inside)); + } + thisGrid++; + } + this->grids = newCluster; + SESAME_INFO("...inside/outside determined"); +} + + + +/** + * Inside Grids are defined in Definition 3.5 of Chen and Tu 2007 as: + * Consider a grid group G and a grid g ∈ G, suppose g =(j1, ··· ,jd), if g has + * neighboring grids in every dimension i =1, ·· · ,d, then g is an inside grid + * in G.Otherwise g is an outside grid in G. + * + * @param grid the density grid to label as being inside or out + * @return TRUE if g is an inside grid, FALSE otherwise + */ +bool SESAME::GridCluster::isInside(DensityGrid grid) +{ + std::vector neighbour= grid.getNeighbours(); + for(auto gridNeighbourhood : neighbour) + { + if(this->grids.find(gridNeighbourhood)==this->grids.end()) + { + return false; + } + } + return true; +} + + +/** + * Inside Grids are defined in Definition 3.5 of Chen and Tu 2007 as: + * Consider a grid group G and a grid g ∈ G, suppose g =(j1, ··· ,jd), if g has + * neighboring grids in every dimension i =1, ·· · ,d, then g is an inside grid + * in G. Otherwise g is an outside grid in G. + * + * @param grid the density grid being labelled as inside or outside + * @param other the density grid being proposed for addition + * @return TRUE if g would be an inside grid, FALSE otherwise + */ +bool SESAME::GridCluster::isInside(DensityGrid grid, DensityGrid other) +{ + std::vector neighbour= grid.getNeighbours(); + for(auto gridNeighbourhood : neighbour) + { + if(this->grids.find(gridNeighbourhood)!=this->grids.end()&&gridNeighbourhood == other) + { + return false; + } + } + return true; +} +/** + * Tests a grid cluster for connectedness according to Definition 3.4, Grid Group, from + * Chen and Tu 2007. + * + * Selects one density grid in the grid cluster as a starting point and iterates repeatedly + * through its neighbours until no more density grids in the grid cluster can be visited. + * + * @return TRUE if the cluster represent one single grid group; FALSE otherwise. + */ + +bool SESAME::GridCluster::isConnected() +{ + //TODO A little confused about here + + if (!this->grids.empty()) + { + DensityGrid grid = this->grids.begin()->first; + if(this->visited.find(grid)!=this->visited.end()) + this->visited.find(grid)->second=this->grids.begin()->second; + else + this->visited.insert(std::make_pair(grid,this->grids.begin()->second)); + bool changesMade; + + do{ + changesMade = false; + auto visIter= this->visited.begin(); + HashGrids toAdd; + + while(visIter!= this->visited.end() && toAdd.empty()) + { + DensityGrid dg2V = visIter->first; + std::vector neighbour= dg2V.getNeighbours(); + for(auto dg2VNeighbourhood : neighbour) + { + if(this->grids.find(dg2VNeighbourhood)!=this->grids.end() + && this->visited.find(dg2VNeighbourhood)==this->visited.end()) + toAdd.insert(std::make_pair(dg2VNeighbourhood, this->grids.find(dg2VNeighbourhood)->second)); + } + visIter++; + } + + if(!toAdd.empty()) + { + HashGrids::iterator gridToAdd; + for (gridToAdd = toAdd.begin(); gridToAdd != toAdd.end(); gridToAdd++) + { + if(this->visited.find(gridToAdd->first)!=this->visited.end()) + this->visited.find(gridToAdd->first)->second=gridToAdd->second; + else + this->visited.insert(std::make_pair(gridToAdd->first,gridToAdd->second)); + } + changesMade = true; + } + }while(changesMade); + } + + if (this->visited.size() == this->grids.size()) + { + //SESAME_INFO("The cluster is still connected. "<visited.size()+" of "<grids.size()<<" reached."); + return true; + } + else + { + //SESAME_INFO("The cluster is no longer connected. "<grids.begin(); iterW != this->grids.end(); iterW++) + { + DensityGrid grid = iterW->first; + if(grid.getInclusionProbability(point) == 1.0) + return 1.0; + } + return 0.0; +} + +bool SESAME::GridCluster::operator==(GridCluster& other)const { + bool equal = false; + if( clusterLabel == other.clusterLabel && grids.size()==other.grids.size() + && visited.size()==other.visited.size()) + equal = true; + return equal; +} \ No newline at end of file diff --git a/src/Algorithm/OfflineClustering/ConnectedRegions.cpp b/src/Algorithm/OfflineClustering/ConnectedRegions.cpp index dbfbbff2..9a7a5796 100644 --- a/src/Algorithm/OfflineClustering/ConnectedRegions.cpp +++ b/src/Algorithm/OfflineClustering/ConnectedRegions.cpp @@ -119,7 +119,8 @@ std::vector SESAME::ConnectedRegions::ResultsToDataSink(){ PointPtr point = DataStructureFactory::createPoint(iter, 0, finalClusters.at(iter).front()->dimension, 0); //This is just for testing, need to delete std::vector centroid(finalClusters.at(iter).front()->dimension,0); - for(auto j=0; j!=finalClusters.at(iter).size();j++) + //TODO maybe wrong ;so dizzy + for(auto j=0; jgetWeight()+finalClusters.at(iter).at(j)->weight; point->setWeight(currentWeight); diff --git a/src/Algorithm/WindowModel/LandmarkWindow.cpp b/src/Algorithm/WindowModel/LandmarkWindow.cpp index fd220aa2..2f5080b0 100644 --- a/src/Algorithm/WindowModel/LandmarkWindow.cpp +++ b/src/Algorithm/WindowModel/LandmarkWindow.cpp @@ -212,12 +212,12 @@ void SESAME::LandmarkWindow::pyramidalWindowProcess(clock_t startTime,const SESA */ void SESAME::LandmarkWindow::storeSnapshot(unsigned int currentOrder,const MicroClusters & microClusters, int elapsedTime) { - SESAME_INFO("taking snapshot"); +// SESAME_INFO("taking snapshot"); unsigned int size=orderSnapShots[currentOrder].size(); SnapshotPtr snapshot; snapshot=DataStructureFactory::createSnapshot(const_cast &>(microClusters),elapsedTime); - SESAME_INFO("The current order size is "<pyramidalWindow.timeInterval+1) { orderSnapShots[currentOrder].erase(orderSnapShots[currentOrder].begin()); } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4967eca2..edf9064c 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -6,6 +6,7 @@ add_executable(Google_Tests_run SystemTest/EDMStreamTest.cpp SystemTest/DenStreamTest.cpp SystemTest/DBStreamTest.cpp + SystemTest/DStreamTest.cpp ) # linking Google_Tests_run with sesame_lib which will be tested diff --git a/test/SystemTest/CluStreamTest.cpp b/test/SystemTest/CluStreamTest.cpp index 89476f01..6bf9dbef 100644 --- a/test/SystemTest/CluStreamTest.cpp +++ b/test/SystemTest/CluStreamTest.cpp @@ -18,17 +18,15 @@ TEST(SystemTest, CluStreamTest) { //Parse parameters. param_t cmd_params; cmd_params.pointNumber = 15120; - cmd_params.seed = 10; - cmd_params.clusterNumber = 10; cmd_params.dimension = 54; - cmd_params.coresetSize = 100; - cmd_params.lastArrivingNum = 60; - cmd_params.timeWindow = 6; - cmd_params.timeInterval = 4; - cmd_params.onlineClusterNumber = 15; - cmd_params.radiusFactor = 70; - cmd_params.initBuffer = 500; - cmd_params.offlineTimeWindow = 2; + cmd_params.clusterNumber = 12; + cmd_params.lastArrivingNum =25; + cmd_params.timeWindow = 3; + cmd_params.timeInterval =6; + cmd_params.onlineClusterNumber =60; + cmd_params.radiusFactor =15; + cmd_params.initBuffer = 1000; + cmd_params.offlineTimeWindow = 0; cmd_params.inputPath = std::filesystem::current_path().generic_string() + "/datasets/CoverType.txt"; cmd_params.outputPath = "results.txt"; cmd_params.algoType = SESAME::CluStreamType; diff --git a/test/SystemTest/DBStreamTest.cpp b/test/SystemTest/DBStreamTest.cpp index 9a4071f5..9c734056 100644 --- a/test/SystemTest/DBStreamTest.cpp +++ b/test/SystemTest/DBStreamTest.cpp @@ -21,11 +21,11 @@ TEST(SystemTest, DBStreamTest) { cmd_params.pointNumber = 15120; cmd_params.dimension = 54; cmd_params.base=2; - cmd_params.lambda= 0.0001; - cmd_params.radius= 500; - cmd_params.cleanUpInterval=3; - cmd_params.weightMin=2; - cmd_params.alpha=0.25; + cmd_params.lambda= 0.6; + cmd_params.radius= 2000; + cmd_params.cleanUpInterval=6; + cmd_params.weightMin=3; + cmd_params.alpha=0.3; cmd_params.inputPath = std::filesystem::current_path().generic_string() + "/datasets/CoverType.txt"; cmd_params.outputPath = "results.txt"; cmd_params.algoType = SESAME::DBStreamType; diff --git a/test/SystemTest/DStreamTest.cpp b/test/SystemTest/DStreamTest.cpp new file mode 100644 index 00000000..9b59ab05 --- /dev/null +++ b/test/SystemTest/DStreamTest.cpp @@ -0,0 +1,47 @@ +// +// Created by 1124a on 2021/11/2. +// + + +#include +#include +#include +#include +#include +#include +#include + +TEST(SystemTest, DStreamTest) { + //Setup Logs. + setupLogging("benchmark.log", LOG_DEBUG); + //Parse parameters. + param_t cmd_params; + cmd_params.pointNumber =15120; + cmd_params.dimension = 54; + cmd_params.lambda= 0.998; + cmd_params.beta=0.001; + cmd_params.cm = 3.0; + cmd_params.cl = 1; + cmd_params.gridWidth= 6; + + cmd_params.inputPath = std::filesystem::current_path().generic_string() + "/datasets/CoverType.txt"; + cmd_params.outputPath = "results.txt"; + cmd_params.algoType = SESAME::DStreamType; + + std::vector input; + std::vector results; + + //Create Spout. + SESAME::DataSourcePtr sourcePtr = SESAME::DataSourceFactory::create(); + //Directly load data from file. TODO: configure it to load from external sensors, e.g., HTTP. + BenchmarkUtils::loadData(cmd_params, sourcePtr); + + //Create Sink. + SESAME::DataSinkPtr sinkPtr = SESAME::DataSinkFactory::create(); + + //Create Algorithm. + SESAME::AlgorithmPtr algoPtr = SESAME::AlgorithmFactory::create(cmd_params); + + //Run algorithm producing results. + BenchmarkUtils::runBenchmark(cmd_params, sourcePtr, sinkPtr, algoPtr); +} \ No newline at end of file diff --git a/test/SystemTest/DenStreamTest.cpp b/test/SystemTest/DenStreamTest.cpp index 28d3e448..73126cae 100644 --- a/test/SystemTest/DenStreamTest.cpp +++ b/test/SystemTest/DenStreamTest.cpp @@ -20,14 +20,14 @@ TEST(SystemTest, DenStreamTest) { param_t cmd_params; cmd_params.pointNumber = 15120; cmd_params.dimension = 54; - cmd_params.minPoints=10; - cmd_params.epsilon=350; + cmd_params.minPoints=40; + cmd_params.epsilon=30; cmd_params.base=2; - cmd_params.lambda= 0.25; - cmd_params.mu=10; + cmd_params.lambda= 0.3; + cmd_params.mu=5; cmd_params.beta=0.25; cmd_params.initBuffer = 1000; - cmd_params.inputPath = std::filesystem::current_path().generic_string() + "/datasets/CoverType.txt"; + cmd_params.inputPath = std::filesystem::current_path().generic_string() + "/datasets/encoded_data.txt"; cmd_params.outputPath = "results.txt"; cmd_params.algoType = SESAME::DenStreamType;