-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* ReWrite DStream based on MOA * ReWrite DStream based on MOA * ReWrite DStream based on MOA, left Debug after refactor all algorithms * Evaluation wx (#70) * add purity * add preparations for CMM, still have problem about driver * CMM done, but need to remove hard code * still need to change voteMap function * purity wx (#71) * add purity * add preparations for CMM, still have problem about driver * CMM done, but need to remove hard code * still need to change voteMap function * fix bug of purity * Update README.md * Add Outlier Detection Part (#72) * Update README.md * pull request from DataSource-yzh to main (#74) * Updated DataSource * Removed hard-code in DataSource::load * Modified timer and removed hard-coded queue initialization Co-authored-by: Zhonghao Yang <zhonghao_yang@zhonghaosMBP.dhcp.sutd.edu.sg> Co-authored-by: Zhonghao Yang <zhonghao_yang@mymail.sutd.edu.sg> * Fix timer (#75) * Construct Time measurement * Constructing Time measurement * Re write timer hpp * Construct Time measurement * Constructing Time measurement * Re write timer hpp * Finish timer, need final debug ... X_X * A LITTLE BUG IN OUTLIER DETECTION PART, NEED TO ADJUST ACCUMULATE TIMER * turn accumulate timer into more simple one * turn accumulate timer into more simple one * fix all * Subtract connection based offline refinement. which can be used later in refactor part (#76) * modify DStream * FU DStream, no bugs, performance is terrible * FU DStream, no bugs, performance now OK * ReWrite DStream based on MOA, left Debug after refactor all algorithms * FU DStream, no bugs, performance now OK * ReWrite DStream based on MOA, left Debug after refactor all algorithms * delete most of the print * ReWrite DStream based on MOA, left Debug after refactor all algorithms * delete most of the print * ReWrite DStream based on MOA, left Debug after refactor all algorithms * delete most of the print * ReWrite DStream based on MOA * ReWrite DStream based on MOA * ReWrite DStream based on MOA, left Debug after refactor all algorithms * modify DStream * FU DStream, no bugs, performance is terrible * FU DStream, no bugs, performance now OK * ReWrite DStream based on MOA, left Debug after refactor all algorithms * FU DStream, no bugs, performance now OK * recover test cmake Co-authored-by: tuidan <40883104+tuidan@users.noreply.github.com> Co-authored-by: Tony <tonyzhang19900609@gmail.com> Co-authored-by: Zhonghao Yang <62000831+zhonghao-yang@users.noreply.github.com> Co-authored-by: Zhonghao Yang <zhonghao_yang@zhonghaosMBP.dhcp.sutd.edu.sg> Co-authored-by: Zhonghao Yang <zhonghao_yang@mymail.sutd.edu.sg> Former-commit-id: 609681d
- Loading branch information
1 parent
6417bb6
commit ed3b38f
Showing
23 changed files
with
1,929 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
// | ||
// Created by 1124a on 2021/10/27. | ||
// | ||
|
||
#ifndef SESAME_INCLUDE_ALGORITHM_DSTREAM_HPP_ | ||
#define SESAME_INCLUDE_ALGORITHM_DSTREAM_HPP_ | ||
#include <Algorithm/Algorithm.hpp> | ||
#include <Algorithm/DataStructure/DensityGrid.hpp> | ||
#include <Algorithm/DataStructure/CharacteristicVector.hpp> | ||
#include <Algorithm/DataStructure/GridCluster.hpp> | ||
#include <Utils/BenchmarkUtils.hpp> | ||
|
||
|
||
namespace SESAME{ | ||
class DStream; | ||
class DStreamParams : public AlgorithmParameters { | ||
public: | ||
// User defined parameter lambda in damped window | ||
double lambda; | ||
/* User defined parameter: Adjusts the window of protection for | ||
* renaming previously deleted grids as being sporadic | ||
* */ | ||
double beta; | ||
double cm; // User defined parameter: Controls the threshold for dense grids | ||
double cl; // User defined parameter: Controls the threshold for sparse grids | ||
int gridWidth; //width of grid | ||
}; | ||
|
||
typedef std::unordered_map<DensityGrid, CharacteristicVector,GridKeyHash,EqualGrid> HashMap; | ||
class DStream : public Algorithm | ||
{ | ||
public: | ||
DStreamParams dStreamParams; | ||
DampedWindowPtr dampedWindow; | ||
clock_t startTime; | ||
clock_t pointArrivingTime; | ||
clock_t lastAdjustTime; | ||
int gap;// Time gap between calls to the offline component | ||
double dm;// Density threshold for dense grids; controlled by cm | ||
double dl; // Density threshold for sparse grids; controlled by cl | ||
int NGrids;//The number of density grids ,with an initial value 0 | ||
|
||
|
||
//TODO Split the grid list from DStream | ||
HashMap gridList; | ||
std::unordered_map<DensityGrid, clock_t,GridKeyHash,EqualGrid> deletedGrids; | ||
std::vector<GridCluster> clusterList; // A list of all Grid Clusters | ||
std::vector<GridCluster> newClusterList; //A list of grid clusters used when re-clustering an existing cluster. | ||
std::vector<int> minVals; //The minimum value seen for a numerical dimension; used to calculate N | ||
std::vector<int> maxVals; //The maximum value seen for a numerical dimension; used to calculate N | ||
|
||
DStream(param_t &cmd_params); | ||
~DStream(); | ||
void Initilize() override; | ||
void runOnlineClustering(PointPtr input) override; | ||
void runOfflineClustering(DataSinkPtr sinkPtr) override; | ||
private: | ||
bool clusterInitial = false; | ||
bool isInitial = false; | ||
bool recalculateN =false; // flag indicating whether N needs to be recalculated after this instance | ||
std::vector<int> tempCoord; | ||
std::vector<int> Coord; | ||
void ifReCalculateN(PointPtr point); | ||
void reCalculateN(); | ||
|
||
void GridListUpdate(std::vector<int> coordinate); | ||
|
||
|
||
void initialClustering(); | ||
|
||
void adjustClustering(); | ||
|
||
|
||
bool adjustLabels(); | ||
bool inspectChangedGrids(); | ||
HashMap adjustForSparseGrid(DensityGrid grid, CharacteristicVector characteristicVec, int gridClass); | ||
HashMap adjustForDenseGrid(DensityGrid grid, CharacteristicVector characteristicVec, int gridClass); | ||
HashMap adjustForTransitionalGrid(DensityGrid grid, CharacteristicVector characteristicVec, int gridClass); | ||
void removeSporadic(); | ||
|
||
|
||
HashMap reCluster (GridCluster gridCluster); | ||
HashMap adjustNewLabels(HashMap newGridList); | ||
void mergeClusters(int smallCluster, int bigCluster); | ||
void cleanClusters(); | ||
HashMap cleanNewClusters(HashMap newGridList); | ||
HashMap mergeNewClusters(HashMap newGridList, int smallCluster, int bigCluster); | ||
double densityThresholdFunction(clock_t tg, double cl, double decayFactor, int NGrids); | ||
bool checkIfSporadic(CharacteristicVector characteristicVec); | ||
void updateGridListDensity(); | ||
static void mergeGridList(HashMap gridList, const HashMap &otherList); | ||
}; | ||
|
||
} | ||
|
||
#endif //SESAME_INCLUDE_ALGORITHM_DSTREAM_HPP_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
// | ||
// Created by 1124a on 2021/10/27. | ||
// | ||
|
||
#ifndef SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_CHARACTERISTICVECTOR_HPP_ | ||
#define SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_CHARACTERISTICVECTOR_HPP_ | ||
#include <ctime> | ||
#include <cmath> | ||
#include <Algorithm/WindowModel/DampedWindow.hpp> | ||
namespace SESAME{ | ||
enum Status{ | ||
NO_CLASS= -1,SPARSE,TRANSITIONAL ,DENSE | ||
}; | ||
class CharacteristicVector { | ||
public: | ||
/** | ||
* t_g: The last time when g is updated | ||
*/ | ||
clock_t updateTime; | ||
|
||
/** | ||
* tm : last time when g is removed from grid_list as a sporadic grid (if ever). | ||
*/ | ||
clock_t removeTime; | ||
|
||
/** | ||
* D: the grid density at the last update | ||
*/ | ||
double gridDensity; | ||
|
||
/** | ||
* label: the cluster label of the grid | ||
*/ | ||
int label; | ||
|
||
/** | ||
* status: status = {SPORADIC, NORMAL} | ||
*/ | ||
bool isSporadic; | ||
|
||
/** | ||
* attribute: attribute = {SPARSE, TRANSITIONAL, DENSE} | ||
*/ | ||
int attribute; | ||
|
||
/** | ||
* time stamp at which the grid's density was last updated (including initial and adjust clustering) | ||
*/ | ||
clock_t densityUpdateTime; | ||
|
||
/** | ||
* Flag marking whether there was a change in the attribute field | ||
* the last time the grid density was updated. | ||
*/ | ||
bool attChange; | ||
bool isVisited=false; | ||
|
||
CharacteristicVector(); | ||
CharacteristicVector(clock_t updateTime, clock_t removeTime, double Density, int label, bool status, double dl, double dm); | ||
double getCurrGridDensity(clock_t NowTime, double lambda); | ||
|
||
bool isSparse(double dl); | ||
bool isDense(double dm); | ||
bool isTransitional(double dm, double dl); | ||
/** | ||
* Implements the density update function given in | ||
* eq 5 (Proposition 3.1) of Chen and Tu 2007. | ||
* | ||
* @param currTime the data stream's current internal time | ||
* @param decayFactor the value of lambda | ||
*/ | ||
void densityWithNew(clock_t NowTime, double decayFactor); | ||
/** | ||
* Implements the update the density of all grids step given at line 2 of | ||
* both Fig 3 and Fig 4 of Chen and Tu 2007. | ||
* | ||
* @param currTime the data stream's current internal time | ||
* @param decayFactor the value of lambda | ||
* @param dl the threshold for sparse grids | ||
* @param dm the threshold for dense grids | ||
*/ | ||
void UpdateAllDensity(clock_t NowTime, double decayFactor, double dl, double dm); | ||
void ChangeAttribute(double dl, double dm); | ||
}; | ||
} | ||
#endif //SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_CHARACTERISTICVECTOR_HPP_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
// | ||
// Created by 1124a on 2021/10/27. | ||
// | ||
#ifndef SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_DENSITYGRID_HPP_ | ||
#define SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_DENSITYGRID_HPP_ | ||
#include <cassert> | ||
#include <iostream> | ||
#include <vector> | ||
#include <Algorithm/DataStructure/Point.hpp> | ||
#include <algorithm> | ||
#include <memory> | ||
|
||
|
||
namespace SESAME{ | ||
|
||
class DensityGrid; | ||
typedef std::shared_ptr<DensityGrid> DensityGridPtr; | ||
|
||
class DensityGrid{ | ||
public: | ||
/** | ||
* For each dimension, its space Si, i =1, ··· ,d is divided into pi partitions as | ||
* Si = Si,1 U Si,2 U ··· U Si,pi | ||
* A density grid g that is composed of S1,j1 ×S2,j2 ···×Sd,jd , ji =1, ...,pi, | ||
* has coordinates (j1,j2, ··· ,jd). | ||
*/ | ||
std::vector<int> coordinates; | ||
/** | ||
* The value of 'd' for the d-dimensional space S considered by D-Stream. | ||
*/ | ||
int dimensions; | ||
|
||
/** | ||
* Flag denoting whether this density grid has been inspected during the adjustClustering() | ||
* step of D-Stream. | ||
*/ | ||
bool isVisited; | ||
/** | ||
* A constructor method for a density grid | ||
* | ||
* @param c the coordinates of the density grid | ||
*/ | ||
DensityGrid(); | ||
DensityGrid(std::vector<int> coordin); | ||
|
||
/** | ||
* A constructor method for a density grid | ||
* | ||
* @param dg the density grid to copy | ||
*/ | ||
DensityGrid(DensityGrid const &grid); | ||
/** | ||
* Generates a vector of neighbours for this density grid by varying each coordinate | ||
* by one in either direction. Does not test whether the generated neighbours are valid as | ||
* DensityGrid is not aware of the number of partitions in each dimension. | ||
* | ||
* @return a vector of neighbours for this density grid | ||
*/ | ||
std::vector<DensityGrid> getNeighbours(); | ||
|
||
/** | ||
* Provides the probability of the argument instance belonging to the density grid in question. | ||
* | ||
* @return 1.0 if the instance equals the density grid's coordinates; 0.0 otherwise. | ||
*/ | ||
|
||
double getInclusionProbability(Point point); | ||
|
||
bool operator==( DensityGrid& gridOther)const; | ||
|
||
|
||
|
||
|
||
}; | ||
struct GridKeyHash{ | ||
std::size_t operator()(const DensityGrid &densityGrid) const | ||
{ | ||
//int[] primes = {31, 37, 41, 43, 47, 53, 59}; | ||
int hc = 1; | ||
for (int i = 0 ; i < densityGrid.dimensions ; i++) | ||
{ | ||
hc = (hc * 31) + densityGrid.coordinates[i]; | ||
} | ||
|
||
return hc; | ||
} | ||
}; | ||
|
||
struct EqualGrid | ||
{ | ||
bool operator() (const DensityGrid &densityGrid1, const DensityGrid &densityGrid2) const | ||
{ | ||
if(densityGrid1.dimensions != densityGrid2.dimensions) | ||
return false; | ||
for(int i = 0 ; i < densityGrid1.dimensions ; i++) | ||
{ | ||
if(densityGrid1.coordinates[i] != densityGrid2.coordinates[i]) | ||
return false; | ||
} | ||
return true; | ||
} | ||
}; | ||
|
||
} | ||
#endif //SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_DENSITYGRID_HPP_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
// | ||
// Created by 1124a on 2021/10/27. | ||
// | ||
|
||
#ifndef SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_GRIDCLUSTER_HPP_ | ||
#define SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_GRIDCLUSTER_HPP_ | ||
#include <algorithm> | ||
#include <functional> | ||
#include <unordered_map> | ||
#include <Algorithm/DataStructure/DensityGrid.hpp> | ||
namespace SESAME{ | ||
class GridCluster; | ||
typedef std::unordered_map<DensityGrid,bool,GridKeyHash,EqualGrid> HashGrids; | ||
class GridCluster{ | ||
public: | ||
HashGrids grids; | ||
HashGrids visited; | ||
int clusterLabel; | ||
//Initialize | ||
GridCluster(int label); | ||
GridCluster(); | ||
GridCluster(HashGrids hashMap, int label); | ||
/** | ||
* @param grid the density grid to add to the cluster | ||
*/ | ||
void addGrid(DensityGrid grid); | ||
|
||
|
||
/** | ||
* @param dg the density grid to remove from the cluster | ||
*/ | ||
void removeGrid(DensityGrid grid); | ||
|
||
/** | ||
* @param gridClus the GridCluster to be absorbed into this cluster | ||
*/ | ||
void absorbCluster(GridCluster gridCluster); | ||
/** | ||
* Inside Grids are defined in Definition 3.5 of Chen and Tu 2007 as: | ||
* Consider a grid group G and a grid g ∈ G, suppose g =(j1, ··· ,jd), if g has | ||
* neighboring grids in every dimension i =1, ·· · ,d, then g is an inside grid | ||
* in G.Otherwise g is an outside grid in G. | ||
* | ||
* @param grid the density grid to label as being inside or out | ||
* @return TRUE if g is an inside grid, FALSE otherwise | ||
*/ | ||
bool isInside(DensityGrid grid); | ||
|
||
|
||
|
||
/** | ||
* Inside Grids are defined in Definition 3.5 of Chen and Tu 2007 as: | ||
* Consider a grid group G and a grid g ∈ G, suppose g =(j1, ··· ,jd), if g has | ||
* neighboring grids in every dimension i =1, ·· · ,d, then g is an inside grid | ||
* in G. Otherwise g is an outside grid in G. | ||
* | ||
* @param grid the density grid being labelled as inside or outside | ||
* @param other the density grid being proposed for addition | ||
* @return TRUE if g would be an inside grid, FALSE otherwise | ||
*/ | ||
bool isInside(DensityGrid grid, DensityGrid other); | ||
|
||
|
||
|
||
/** | ||
* Tests a grid cluster for connectedness according to Definition 3.4, Grid Group, from | ||
* Chen and Tu 2007. | ||
* | ||
* Selects one density grid in the grid cluster as a starting point and iterates repeatedly | ||
* through its neighbours until no more density grids in the grid cluster can be visited. | ||
* | ||
* @return TRUE if the cluster represent one single grid group; FALSE otherwise. | ||
*/ | ||
|
||
bool isConnected(); | ||
|
||
|
||
/** | ||
* Iterates through the DensityGrids in the cluster and calculates the inclusion probability for each. | ||
* | ||
* @return 1.0 if instance matches any of the density grids; 0.0 otherwise. | ||
*/ | ||
double getInclusionProb(Point point); | ||
bool operator==( GridCluster& Other)const; | ||
}; | ||
|
||
|
||
} | ||
|
||
|
||
#endif //SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_GRIDCLUSTER_HPP_ |
Oops, something went wrong.