Skip to content

Commit

Permalink
Birch wx (#48)
Browse files Browse the repository at this point in the history
* modify data sink part in StreamKM

* fix bug for birch, complete task #10, modify store functions

Former-commit-id: 511978b
  • Loading branch information
tuidan authored Aug 25, 2021
1 parent 3185121 commit 7be4f80
Show file tree
Hide file tree
Showing 29 changed files with 1,241 additions and 148 deletions.
File renamed without changes.
150 changes: 150 additions & 0 deletions benchmark/datasets/Mock.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
1 5.1 3.5 1.4 0.2 0
2 4.9 3 1.4 0.2 0
3 4.7 3.2 1.3 0.2 0
4 4.6 3.1 1.5 0.2 0
5 5 3.6 1.4 0.2 0
6 5.4 3.9 1.7 0.4 0
7 4.6 3.4 1.4 0.3 0
8 5 3.4 1.5 0.2 0
9 4.4 2.9 1.4 0.2 0
10 4.9 3.1 1.5 0.1 0
11 5.4 3.7 1.5 0.2 0
12 4.8 3.4 1.6 0.2 0
13 4.8 3 1.4 0.1 0
14 4.3 3 1.1 0.1 0
15 5.8 4 1.2 0.2 0
16 5.7 4.4 1.5 0.4 0
17 5.4 3.9 1.3 0.4 0
18 5.1 3.5 1.4 0.3 0
19 5.7 3.8 1.7 0.3 0
20 5.1 3.8 1.5 0.3 0
21 5.4 3.4 1.7 0.2 0
22 5.1 3.7 1.5 0.4 0
23 4.6 3.6 1 0.2 0
24 5.1 3.3 1.7 0.5 0
25 4.8 3.4 1.9 0.2 0
26 5 3 1.6 0.2 0
27 5 3.4 1.6 0.4 0
28 5.2 3.5 1.5 0.2 0
29 5.2 3.4 1.4 0.2 0
30 4.7 3.2 1.6 0.2 0
31 4.8 3.1 1.6 0.2 0
32 5.4 3.4 1.5 0.4 0
33 5.2 4.1 1.5 0.1 0
34 5.5 4.2 1.4 0.2 0
35 4.9 3.1 1.5 0.1 0
36 5 3.2 1.2 0.2 0
37 5.5 3.5 1.3 0.2 0
38 4.9 3.1 1.5 0.1 0
39 4.4 3 1.3 0.2 0
40 5.1 3.4 1.5 0.2 0
41 5 3.5 1.3 0.3 0
42 4.5 2.3 1.3 0.3 0
43 4.4 3.2 1.3 0.2 0
44 5 3.5 1.6 0.6 0
45 5.1 3.8 1.9 0.4 0
46 4.8 3 1.4 0.3 0
47 5.1 3.8 1.6 0.2 0
48 4.6 3.2 1.4 0.2 0
49 5.3 3.7 1.5 0.2 0
50 5 3.3 1.4 0.2 0
51 7 3.2 4.7 1.4 1
52 6.4 3.2 4.5 1.5 1
53 6.9 3.1 4.9 1.5 1
54 5.5 2.3 4 1.3 1
55 6.5 2.8 4.6 1.5 1
56 5.7 2.8 4.5 1.3 1
57 6.3 3.3 4.7 1.6 1
58 4.9 2.4 3.3 1 1
59 6.6 2.9 4.6 1.3 1
60 5.2 2.7 3.9 1.4 1
61 5 2 3.5 1 1
62 5.9 3 4.2 1.5 1
63 6 2.2 4 1 1
64 6.1 2.9 4.7 1.4 1
65 5.6 2.9 3.6 1.3 1
66 6.7 3.1 4.4 1.4 1
67 5.6 3 4.5 1.5 1
68 5.8 2.7 4.1 1 1
69 6.2 2.2 4.5 1.5 1
70 5.6 2.5 3.9 1.1 1
71 5.9 3.2 4.8 1.8 1
72 6.1 2.8 4 1.3 1
73 6.3 2.5 4.9 1.5 1
74 6.1 2.8 4.7 1.2 1
75 6.4 2.9 4.3 1.3 1
76 6.6 3 4.4 1.4 1
77 6.8 2.8 4.8 1.4 1
78 6.7 3 5 1.7 1
79 6 2.9 4.5 1.5 1
80 5.7 2.6 3.5 1 1
81 5.5 2.4 3.8 1.1 1
82 5.5 2.4 3.7 1 1
83 5.8 2.7 3.9 1.2 1
84 6 2.7 5.1 1.6 1
85 5.4 3 4.5 1.5 1
86 6 3.4 4.5 1.6 1
87 6.7 3.1 4.7 1.5 1
88 6.3 2.3 4.4 1.3 1
89 5.6 3 4.1 1.3 1
90 5.5 2.5 4 1.3 1
91 5.5 2.6 4.4 1.2 1
92 6.1 3 4.6 1.4 1
93 5.8 2.6 4 1.2 1
94 5 2.3 3.3 1 1
95 5.6 2.7 4.2 1.3 1
96 5.7 3 4.2 1.2 1
97 5.7 2.9 4.2 1.3 1
98 6.2 2.9 4.3 1.3 1
99 5.1 2.5 3 1.1 1
100 5.7 2.8 4.1 1.3 1
101 6.3 3.3 6 2.5 2
102 5.8 2.7 5.1 1.9 2
103 7.1 3 5.9 2.1 2
104 6.3 2.9 5.6 1.8 2
105 6.5 3 5.8 2.2 2
106 7.6 3 6.6 2.1 2
107 4.9 2.5 4.5 1.7 2
108 7.3 2.9 6.3 1.8 2
109 6.7 2.5 5.8 1.8 2
110 7.2 3.6 6.1 2.5 2
111 6.5 3.2 5.1 2 2
112 6.4 2.7 5.3 1.9 2
113 6.8 3 5.5 2.1 2
114 5.7 2.5 5 2 2
115 5.8 2.8 5.1 2.4 2
116 6.4 3.2 5.3 2.3 2
117 6.5 3 5.5 1.8 2
118 7.7 3.8 6.7 2.2 2
119 7.7 2.6 6.9 2.3 2
120 6 2.2 5 1.5 2
121 6.9 3.2 5.7 2.3 2
122 5.6 2.8 4.9 2 2
123 7.7 2.8 6.7 2 2
124 6.3 2.7 4.9 1.8 2
125 6.7 3.3 5.7 2.1 2
126 7.2 3.2 6 1.8 2
127 6.2 2.8 4.8 1.8 2
128 6.1 3 4.9 1.8 2
129 6.4 2.8 5.6 2.1 2
130 7.2 3 5.8 1.6 2
131 7.4 2.8 6.1 1.9 2
132 7.9 3.8 6.4 2 2
133 6.4 2.8 5.6 2.2 2
134 6.3 2.8 5.1 1.5 2
135 6.1 2.6 5.6 1.4 2
136 7.7 3 6.1 2.3 2
137 6.3 3.4 5.6 2.4 2
138 6.4 3.1 5.5 1.8 2
139 6 3 4.8 1.8 2
140 6.9 3.1 5.4 2.1 2
141 6.7 3.1 5.6 2.4 2
142 6.9 3.1 5.1 2.3 2
143 5.8 2.7 5.1 1.9 2
144 6.8 3.2 5.9 2.3 2
145 6.7 3.3 5.7 2.5 2
146 6.7 3 5.2 2.3 2
147 6.3 2.5 5 1.9 2
148 6.5 3 5.2 2 2
149 6.2 3.4 5.4 2.3 2
150 5.9 3 5.1 1.8 2
14 changes: 1 addition & 13 deletions benchmark/src/Benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,7 @@ int main(int argc, char **argv) {
SESAME::DataSinkPtr sinkPtr = SESAME::DataSinkFactory::create();

//Create Algorithm.
SESAME::AlgorithmPtr algoPtr = SESAME::AlgorithmFactory::create(cmd_params.algoName,
cmd_params.pointNumber,
cmd_params.clusterNumber,
cmd_params.dimension,
cmd_params.coresetSize,
cmd_params.seed,
cmd_params.lastArrivingNum,
cmd_params.timeWindow,
cmd_params.timeInterval,
cmd_params.onlineClusterNumber,
cmd_params.radiusFactor,
cmd_params.initBuffer,
cmd_params.offlineTimeWindow);
SESAME::AlgorithmPtr algoPtr = SESAME::AlgorithmFactory::create(cmd_params);

//Run algorithm producing results.
BenchmarkUtils::runBenchmark(cmd_params, sourcePtr, sinkPtr, algoPtr);
Expand Down
2 changes: 1 addition & 1 deletion include/Algorithm/Algorithm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class Algorithm {
virtual void Initilize() = 0;
virtual void runOnlineClustering(SESAME::PointPtr input) = 0;
virtual void runOfflineClustering(SESAME::DataSinkPtr ptr) = 0;
void store(std::string outputPath, int numberOfCenters, int dimension, std::vector<PointPtr> results);
void store(std::string outputPath, int dimension, std::vector<PointPtr> results);
};
}

Expand Down
16 changes: 3 additions & 13 deletions include/Algorithm/AlgorithmFactory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,13 @@
#define SESAME_SRC_ALGORITHM_ALGORITHMFACTORY_HPP_

#include <Algorithm/Algorithm.hpp>
#include <Utils/BenchmarkUtils.hpp>

namespace SESAME {
class AlgorithmFactory {

public:
static SESAME::AlgorithmPtr create(std::string algoName,
int pointNumber,
int clusterNumber,
int dimension,
int coresetSize,
int seed,
int lastArrivingNum,
int timeWindow,
unsigned int timeInterval,
int onlineClusterNumber,
double radiusFactor,
int initBuffer,
int offlineTimeWindow);
static SESAME::AlgorithmPtr create(param_t &cmd_params);
};
}
#endif //SESAME_SRC_ALGORITHM_ALGORITHMFACTORY_HPP_
56 changes: 56 additions & 0 deletions include/Algorithm/Birch.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
//
// Created by tuidan on 2021/8/24.
//

#ifndef SESAME_INCLUDE_ALGORITHM_BIRCH_HPP_
#define SESAME_INCLUDE_ALGORITHM_BIRCH_HPP_
#include <Algorithm/Algorithm.hpp>
#include <Algorithm/WindowModel/LandmarkWindow.hpp>
#include <Algorithm/OfflineClustering/KMeans.hpp>
#include <Sinks/DataSink.hpp>
#include <Algorithm/DataStructure/CFTree.hpp>
namespace SESAME {

class BirchParameter : public AlgorithmParameters {
public:
int maxInternalNodes; // B
int maxLeafNodes; // L
double thresholdDistance; // T
};

class Birch : public Algorithm {

public:
BirchParameter BirchParam;
std::shared_ptr<KMeans> kmeans; //used for offline initialization
int leafMask = 0;
NodePtr root;
vector<NodePtr> leafNodes;
CFTreePtr cfTree;
Birch();

~Birch();

void Initilize() override;

void runOnlineClustering(PointPtr input) override;

void runOfflineClustering(DataSinkPtr sinkPtr) override;
private:

void forwardInsert(PointPtr point);
void backwardEvolution(NodePtr &curNode, PointPtr &point);
void calculateCorDistance(vector<vector<double>> &distance, vector<NodePtr> &nodes);
double calculateRadius(PointPtr &point, PointPtr &centroid);
void selectChild(vector<NodePtr> &children, PointPtr &insertPoint, NodePtr &node);
double clusterToClusterDist(NodePtr &nodeA, NodePtr &nodeB);
void pointToClusterDist(PointPtr &insertPoint, NodePtr &node, double &dist);
void calculateCentroid(CFPtr &cf, PointPtr &centroid);
void updateNLS(NodePtr &node, PointPtr &point, bool updateAll);
void initializeCF(CFPtr &cf, int dimension);
void setCFToBlankNode(SESAME::NodePtr &curNode, SESAME::PointPtr &point);
void addNodeNLSToNode(SESAME::NodePtr &child, SESAME::NodePtr &parent);
void clearChildParents(vector<SESAME::NodePtr> &children);
};
}
#endif //SESAME_INCLUDE_ALGORITHM_BIRCH_HPP_
67 changes: 67 additions & 0 deletions include/Algorithm/DataStructure/CFTree.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
//
// Created by tuidan on 2021/8/24.
//

#ifndef SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_CFTREE_HPP_
#define SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_CFTREE_HPP_
#include <iostream>
#include <memory>
#include <vector>
#include <Algorithm/DataStructure/FeatureVector.hpp>
#include <Algorithm/DataStructure/Point.hpp>
namespace SESAME {

// define the share point of the class object
class CFNode;
class CFTree;
typedef std::shared_ptr<CFNode> NodePtr;
typedef std::shared_ptr<CFTree> CFTreePtr;

class CFTree {
private:
int maxInternalNodes; // max CF number of each internal node
int maxLeafNodes; // max CF number of each leaf node
double thresholdDistance; // threshold radius of each sub cluster in leaf nodes
public:
CFTree(int b, int l, double t);
void initialTree(int b, int l, double t);
~CFTree();
int getB() const;
int getL() const;
double getT() const;
void setB(int b);
void setL(int l);
void setT(double t);
};


class CFNode {
private:
CFPtr curCF;
bool isLeaf;
std::vector<NodePtr> children;
std::vector<NodePtr> parent;
std:: vector<SESAME::PointPtr> clusterPoints;
int index;
public:
CFNode();
~CFNode();
SESAME::CFPtr getCF();
void setCF(SESAME::CFPtr &cf);
void insertPoint(PointPtr &p);
std::vector<SESAME::PointPtr> getPoints();
std::vector<SESAME::NodePtr> getParents();
int getIndex() const;
std::vector<SESAME::NodePtr> getChildren();
void removeChild(SESAME::NodePtr &child);
SESAME::NodePtr copy();
bool getIsLeaf();
void setIsLeaf(bool leaf);
void setNode(SESAME::CFPtr &Node);
void setIndex(int Index);
void setParent(SESAME::NodePtr &Parent);
void setChild(SESAME::NodePtr &child);
void clearParents();
};
}
#endif //SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_CFTREE_HPP_
5 changes: 5 additions & 0 deletions include/Algorithm/DataStructure/DataStructureFactory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
#include <Algorithm/DataStructure/CoresetTree.hpp>
#include <Algorithm/DataStructure/MicroCluster.hpp>
#include <Algorithm/DataStructure/Snapshot.hpp>
#include <Algorithm/DataStructure/CFTree.hpp>
#include <Algorithm/DataStructure/FeatureVector.hpp>

namespace SESAME {
class DataStructureFactory {

Expand All @@ -28,6 +31,8 @@ class DataStructureFactory {
static void clearMicroCluster(MicroClusterPtr microCluster);
static SnapshotPtr createSnapshot(MicroClusters & otherMicroClusters,int elapsedTime);
static void clearSnapshot(SnapshotPtr snapshot);
static CFTreePtr createCFTree();
static NodePtr createNode();

};
}
Expand Down
34 changes: 34 additions & 0 deletions include/Algorithm/DataStructure/FeatureVector.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
//
// Created by tuidan on 2021/8/24.
//

#ifndef SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_FEATUREVECTOR_H_
#define SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_FEATUREVECTOR_H_
#include <memory>
#include <vector>
#include <iostream>

namespace SESAME {
class CF;
typedef std::shared_ptr<CF> CFPtr;
class CF {
private:
// N是子类中节点的数目,LS是N个节点的线性和,SS是N个节点的平方和
int NumberOfNodes;
std::vector<double> LS;
std::vector<double> SS;
public:
CF();
~CF();
int getN() const;
void setN(int n);
std::vector<double> getLS() const;
std::vector<double> getSS() const;
double getLSItem(int index) const;
double getSSItem(int index) const;
void setLS(std::vector<double> &newLs);
void setSS(std::vector<double> &newSs);
SESAME::CFPtr copy();
};
}
#endif //SESAME_INCLUDE_ALGORITHM_DATASTRUCTURE_FEATUREVECTOR_H_
1 change: 1 addition & 0 deletions include/Algorithm/DataStructure/Point.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class Point {
void setClusteringCenter(int index);
int getDimension() const;
void setDimension(int d);
int getFeatureLength();
SESAME::PointPtr copy();
};
}
Expand Down
Loading

0 comments on commit 7be4f80

Please sign in to comment.