-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Subtract connection based offline refinement. which can be used later…
… in refactor part (#76)
- Loading branch information
1 parent
4f1d8fd
commit a730991
Showing
5 changed files
with
233 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
// | ||
// Created by 1124a on 2021/11/23. | ||
// | ||
|
||
#ifndef SESAME_INCLUDE_ALGORITHM_OFFLINECLUSTERING_CONNECTEDREGIONS_HPP_ | ||
#define SESAME_INCLUDE_ALGORITHM_OFFLINECLUSTERING_CONNECTEDREGIONS_HPP_ | ||
|
||
#include <Algorithm/DataStructure/WeightedAdjacencyList.hpp> | ||
#include <Algorithm/DataStructure/MicroCluster.hpp> | ||
#include <Algorithm/DataStructure/Point.hpp> | ||
#include <Algorithm/OfflineClustering/OfflineClustering.hpp> | ||
#include <Algorithm/DataStructure/DataStructureFactory.hpp> | ||
#include <Utils/Logger.hpp> | ||
namespace SESAME { | ||
class ConnectedRegions : public SESAME::OfflineClustering { | ||
public: | ||
double alpha; //intersection factor, alpha | ||
double weightMin; //minimum weight | ||
std::vector<std::vector<MicroClusterPtr>> finalClusters; | ||
unordered_map<int,std::vector<int>> connecvtivityGraphId; | ||
ConnectedRegions(); | ||
ConnectedRegions(double alpha, double weightMin); | ||
void connection( std::vector<MicroClusterPtr>& microClusters, | ||
SESAME::WeightedAdjacencyList weightedAdjacencyList); | ||
std::vector<PointPtr> ResultsToDataSink(); | ||
|
||
/** | ||
* @Description: insert vertices and entries into connectivity graph when micro cluster pair | ||
* connectivity value greater than the intersection threshold | ||
* if the graph has testing micro cluster, add connected strong MC in the corresponding entries | ||
* else, create new V,E into the graph | ||
* @Param: connectivity graph, micro cluster 1 and 2 | ||
* @Return: void | ||
*/ | ||
void insertIntoGraph( std::vector<MicroClusterPtr> microClusters, | ||
int microClusterId,int OtherId); | ||
void insertIntoGraph( std::vector<MicroClusterPtr> microClusters, | ||
int microClusterId); | ||
/** | ||
* @Description: findConnectedComponents function visit the existing connectivity graph | ||
* and find all connected strong MCs that will finally form arbitrary-shaped macro clusters | ||
* each macro cluster will be stored as a vector of micro clusters, which will be transformed into | ||
* point that stores in sink later | ||
* @Param: connectivity graph | ||
* @Return: void | ||
*/ | ||
void findConnectedComponents(std::vector<MicroClusterPtr> microClusters); | ||
|
||
}; | ||
} | ||
|
||
#endif //SESAME_INCLUDE_ALGORITHM_OFFLINECLUSTERING_CONNECTEDREGIONS_HPP_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,6 @@ | ||
add_source_sesame( | ||
OfflineClustering.cpp | ||
KMeans.cpp | ||
DBSCAN.cpp) | ||
DBSCAN.cpp | ||
ConnectedRegions.cpp | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
// | ||
// Created by 1124a on 2021/11/23. | ||
// | ||
|
||
#include <Algorithm/OfflineClustering/ConnectedRegions.hpp> | ||
SESAME::ConnectedRegions::ConnectedRegions(){ | ||
|
||
} | ||
SESAME::ConnectedRegions::ConnectedRegions(double alpha, double weightMin){ | ||
this->alpha = alpha; | ||
this->weightMin = weightMin; | ||
} | ||
void SESAME::ConnectedRegions::connection( std::vector<MicroClusterPtr>& microClusters, | ||
|
||
SESAME::WeightedAdjacencyList weightedAdjacencyList) { | ||
SESAME_INFO("alpha."<<alpha<<" weightMin is "<<weightMin); | ||
WeightedAdjacencyList::iterator iterW; | ||
for (iterW = weightedAdjacencyList.begin(); iterW != weightedAdjacencyList.end(); iterW++){ | ||
if (iterW->first.microCluster1->weight >= weightMin &&iterW->first.microCluster2->weight >= weightMin){ | ||
double val = 2*iterW->second->weight / (iterW->first.microCluster1->weight+iterW->first.microCluster2->weight); | ||
if (val > alpha) { | ||
insertIntoGraph( microClusters, | ||
iterW->first.microCluster1->id.front(), | ||
iterW->first.microCluster2->id.front()); | ||
|
||
insertIntoGraph(microClusters, | ||
iterW->first.microCluster2->id.front(), | ||
iterW->first.microCluster1->id.front()); | ||
|
||
} | ||
else | ||
{ | ||
insertIntoGraph(microClusters, | ||
iterW->first.microCluster1->id.front()); | ||
insertIntoGraph(microClusters, | ||
iterW->first.microCluster2->id.front()); | ||
} | ||
} | ||
} | ||
findConnectedComponents(microClusters); | ||
} | ||
/** | ||
* @Description: insert vertices and entries into connectivity graph when micro cluster pair | ||
* connectivity value greater than the intersection threshold | ||
* if the graph has testing micro cluster, add connected strong MC in the corresponding entries | ||
* else, create new V,E into the graph | ||
* @Param: connectivity graph, micro cluster 1 and 2 | ||
* @Return: void | ||
*/ | ||
|
||
|
||
void SESAME::ConnectedRegions::insertIntoGraph( std::vector<MicroClusterPtr> microClusters, | ||
int microClusterId,int OtherId){ | ||
if (connecvtivityGraphId.find(microClusterId)!=connecvtivityGraphId.end()) | ||
{ | ||
if(std::find(connecvtivityGraphId[microClusterId].begin(),connecvtivityGraphId[microClusterId].end(),OtherId)==connecvtivityGraphId[microClusterId].end()) | ||
connecvtivityGraphId[microClusterId].push_back(OtherId); | ||
} else{ | ||
auto microCluster = std::find_if(microClusters.begin(), microClusters.end(),SESAME::finderMicroCluster(microClusterId)); | ||
(*microCluster)->visited=false; | ||
std::vector<int> newMicroClusterIdSet; | ||
newMicroClusterIdSet.push_back(OtherId); | ||
connecvtivityGraphId.insert(make_pair(microClusterId,OtherId)); | ||
} | ||
} | ||
|
||
|
||
void SESAME::ConnectedRegions::insertIntoGraph( std::vector<MicroClusterPtr> microClusters, | ||
int microClusterId){ | ||
if (connecvtivityGraphId.find(microClusterId)==connecvtivityGraphId.end()) | ||
{ | ||
auto microCluster = std::find_if(microClusters.begin(), microClusters.end(), | ||
SESAME::finderMicroCluster(microClusterId)); | ||
(*microCluster)->visited=false; | ||
std::vector<int> newMicroClusterIdSet; | ||
connecvtivityGraphId.insert(make_pair(microClusterId,newMicroClusterIdSet)); | ||
} | ||
|
||
} | ||
|
||
void SESAME::ConnectedRegions::findConnectedComponents(std::vector<SESAME::MicroClusterPtr> microClusters){ | ||
// SESAME_INFO("micro clusters "<<microClusters.size()); | ||
// SESAME_INFO("connectivity Graph "<<connecvtivityGraphId.size()); | ||
|
||
unordered_map<int,std::vector<int>>::iterator iter; | ||
//This variable just for indicating the id of micro cluster which forming macro clusters | ||
for (iter = connecvtivityGraphId.begin(); iter != connecvtivityGraphId.end(); iter++){ | ||
std::vector<int> idList; | ||
auto microClusterKey = std::find_if(microClusters.begin(), microClusters.end(),SESAME::finderMicroCluster(iter->first)); | ||
if (!(*microClusterKey)->visited) { | ||
std::vector<SESAME::MicroClusterPtr> newCluster; | ||
newCluster.push_back((*microClusterKey)); | ||
idList.push_back(iter->first); | ||
for(int iterValue : iter->second) | ||
{ | ||
auto microClusterElement = std::find_if(microClusters.begin(), microClusters.end(),SESAME::finderMicroCluster(iterValue)); | ||
if (!(*microClusterElement)->visited) | ||
{ | ||
newCluster.push_back((*microClusterElement)); | ||
(*microClusterElement)->visited = true; | ||
idList.push_back((*microClusterElement)->id.front()); | ||
} | ||
} | ||
this->finalClusters.push_back(newCluster); | ||
//just used for examine reform ,need to delete later | ||
// std::stringstream result; | ||
// std::copy(idList.begin(),idList.end(),std::ostream_iterator<int>(result, " ")); | ||
// SESAME_INFO("New formed macro cluster ... including micro cluster :"); | ||
// SESAME_INFO(" " << result.str() ); | ||
} | ||
} | ||
} | ||
|
||
std::vector<SESAME::PointPtr> SESAME::ConnectedRegions::ResultsToDataSink(){ | ||
// SESAME_INFO("Start resize "<<finalClusters.size()); | ||
std::vector<SESAME::PointPtr> points; | ||
for(auto iter=0; iter!=finalClusters.size();iter++) | ||
{ //initialize pseudo point of macro clusters | ||
PointPtr point = DataStructureFactory::createPoint(iter, 0, finalClusters.at(iter).front()->dimension, 0); | ||
//This is just for testing, need to delete | ||
std::vector<double> centroid(finalClusters.at(iter).front()->dimension,0); | ||
for(auto j=0; j!=finalClusters.at(iter).size();j++) | ||
{ | ||
double currentWeight=point->getWeight()+finalClusters.at(iter).at(j)->weight; | ||
point->setWeight(currentWeight); | ||
for(auto a =0;a<finalClusters.at(iter).at(j)->dimension;a++) | ||
{ | ||
if(j==0) | ||
point->setFeatureItem(0,a); | ||
point->setFeatureItem(point->getFeatureItem(a)+finalClusters.at(iter).at(j)->centroid.at(a),a); | ||
centroid[a]=point->getFeatureItem(a);//testing | ||
if(j==finalClusters.at(iter).size()-1) | ||
{ | ||
point->setFeatureItem(point->getFeatureItem(a)/finalClusters.at(iter).at(j)->dimension,a); | ||
centroid[a] =centroid[a]/finalClusters.at(iter).at(j)->dimension;//testing | ||
} | ||
} | ||
} | ||
points.push_back(point); | ||
// std::stringstream results; | ||
// std::copy(centroid.begin(),centroid.end(),std::ostream_iterator<double>(results, " ")); | ||
// SESAME_INFO("The NO."<<iter<<" Centroid is "<<results.str()); | ||
} | ||
return points; | ||
} |