-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
32 changed files
with
1,094 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
################# Leiden community detection module | ||
|
||
# Community detection module | ||
|
||
set(leiden_community_detection_module_src | ||
leiden_community_detection_module.cpp | ||
algorithm/leiden.cpp | ||
leiden_utils/leiden_utils.cpp) | ||
|
||
find_package(Boost CONFIG) | ||
|
||
add_query_module(leiden_community_detection 1 "${leiden_community_detection_module_src}") | ||
target_include_directories(leiden_community_detection PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${Boost_INCLUDE_DIRS}) | ||
target_link_libraries(leiden_community_detection PRIVATE mg_utility) |
561 changes: 561 additions & 0 deletions
561
cpp/leiden_community_detection_module/algorithm/leiden.cpp
Large diffs are not rendered by default.
Oops, something went wrong.
24 changes: 24 additions & 0 deletions
24
cpp/leiden_community_detection_module/algorithm/leiden.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#pragma once | ||
|
||
#include <cstdint> | ||
#include <vector> | ||
|
||
#include <mg_procedure.h> | ||
#include <mg_exceptions.hpp> | ||
#include <mg_graph.hpp> | ||
|
||
namespace leiden_alg { | ||
/// | ||
/// @brief Performs the Leiden community detection algorithm on the given graph. | ||
/// | ||
/// @param graph The graph on which to perform community detection. | ||
/// @param gamma Parameter that controls the resolution of the algorithm. | ||
/// @param theta Parameter that adjusts merging of communities based on modularity. | ||
/// @param resolution_parameter Controls the granularity of the detected communities. | ||
/// @param max_iterations The maximum number of iterations the algorithm will run. | ||
/// @return A vector of vectors where each vector represents a community hierarchy for a node. | ||
/// | ||
std::vector<std::vector<std::uint64_t>> GetCommunities(const mg_graph::GraphView<> &graph, double gamma, double theta, | ||
double resolution_parameter, std::uint64_t max_iterations); | ||
|
||
} // namespace leiden_alg |
142 changes: 142 additions & 0 deletions
142
cpp/leiden_community_detection_module/leiden_community_detection_module.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
#include <atomic> | ||
#include <cstdint> | ||
#include <mg_exceptions.hpp> | ||
#include <mg_utils.hpp> | ||
|
||
#include "_mgp.hpp" | ||
#include "algorithm/leiden.hpp" | ||
#include "mg_procedure.h" | ||
|
||
|
||
namespace { | ||
|
||
const char *kProcedureGet = "get"; | ||
const char *kProcedureGetSubgraph = "get_subgraph"; | ||
const char *kArgumentSubgraphNodes = "subgraph_nodes"; | ||
const char *kArgumentSubgraphRelationships = "subgraph_relationships"; | ||
|
||
const char *kFieldNode = "node"; | ||
const char *kFieldCommunity = "community_id"; | ||
const char *kFieldCommunities = "communities"; | ||
const char *kDefaultWeightProperty = "weight"; | ||
const double kDefaultGamma = 1.0; | ||
const double kDefaultTheta = 0.01; | ||
const double kDefaultResolutionParameter = 0.01; | ||
const std::uint64_t kDefaultMaxIterations = std::numeric_limits<std::uint64_t>::max(); | ||
|
||
void InsertLeidenRecord(mgp_graph *graph, mgp_result *result, mgp_memory *memory, const std::uint64_t node_id, | ||
const std::vector<std::uint64_t> &community) { | ||
auto *vertex = mg_utility::GetNodeForInsertion(node_id, graph, memory); | ||
if (!vertex) return; | ||
|
||
mgp_result_record *record = mgp::result_new_record(result); | ||
if (record == nullptr) throw mg_exception::NotEnoughMemoryException(); | ||
|
||
mg_utility::InsertNodeValueResult(record, kFieldNode, vertex, memory); | ||
mg_utility::InsertIntValueResult(record, kFieldCommunity, community.back(), memory); | ||
|
||
auto *community_list = mgp::list_make_empty(0, memory); | ||
for (const auto &community_id : community) { | ||
mgp::list_append_extend(community_list, mgp::value_make_int(community_id, memory)); | ||
} | ||
|
||
mg_utility::InsertListValueResult(record, kFieldCommunities, community_list, memory); | ||
} | ||
|
||
void LeidenCommunityDetection(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory, bool subgraph) { | ||
auto index = 0; | ||
mgp_list *subgraph_nodes = nullptr; | ||
mgp_list *subgraph_relationships = nullptr; | ||
if (subgraph) { | ||
subgraph_nodes = mgp::value_get_list(mgp::list_at(args, index++)); | ||
subgraph_relationships = mgp::value_get_list(mgp::list_at(args, index++)); | ||
} | ||
const auto *weight_property = mgp::value_get_string(mgp::list_at(args, index++)); | ||
const auto gamma = mgp::value_get_double(mgp::list_at(args, index++)); | ||
const auto theta = mgp::value_get_double(mgp::list_at(args, index++)); | ||
const auto resolution_parameter = mgp::value_get_double(mgp::list_at(args, index++)); | ||
const auto max_iterations = mgp::value_get_int(mgp::list_at(args, index++)); | ||
|
||
const auto graph = subgraph | ||
? mg_utility::GetWeightedSubgraphView(memgraph_graph, result, memory, subgraph_nodes, subgraph_relationships, mg_graph::GraphType::kUndirectedGraph, weight_property, 1.0) | ||
: mg_utility::GetWeightedGraphView(memgraph_graph, result, memory, mg_graph::GraphType::kUndirectedGraph, weight_property, 1.0); | ||
auto communities = leiden_alg::GetCommunities(*graph, gamma, theta, resolution_parameter, max_iterations); | ||
if (communities.empty()) return; | ||
|
||
for (std::size_t i = 0; i < communities.size(); i++) { | ||
InsertLeidenRecord(memgraph_graph, result, memory, graph->GetMemgraphNodeId(i), communities[i]); | ||
} | ||
} | ||
|
||
void OnGraph(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) { | ||
try { | ||
LeidenCommunityDetection(args, memgraph_graph, result, memory, false); | ||
} catch (const std::exception &e) { | ||
mgp::result_set_error_msg(result, e.what()); | ||
return; | ||
} | ||
} | ||
|
||
void OnSubgraph(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) { | ||
try { | ||
LeidenCommunityDetection(args, memgraph_graph, result, memory, true); | ||
} catch (const std::exception &e) { | ||
mgp::result_set_error_msg(result, e.what()); | ||
return; | ||
} | ||
} | ||
|
||
|
||
extern "C" int mgp_init_module(mgp_module *module, mgp_memory *memory) { | ||
try { | ||
auto *const default_weight_property = mgp::value_make_string(kDefaultWeightProperty, memory); | ||
auto *const default_gamma = mgp::value_make_double(kDefaultGamma, memory); | ||
auto *const default_theta = mgp::value_make_double(kDefaultTheta, memory); | ||
auto *const default_resolution_parameter = mgp::value_make_double(kDefaultResolutionParameter, memory); | ||
auto *const default_max_iterations = mgp::value_make_int(kDefaultMaxIterations, memory); | ||
|
||
mgp::MemoryDispatcherGuard guard{memory}; | ||
{ | ||
auto *proc = mgp::module_add_read_procedure(module, kProcedureGet, OnGraph); | ||
mgp::proc_add_opt_arg(proc, "weight_property", mgp::type_string(), default_weight_property); | ||
mgp::proc_add_opt_arg(proc, "gamma", mgp::type_float(), default_gamma); | ||
mgp::proc_add_opt_arg(proc, "theta", mgp::type_float(), default_theta); | ||
mgp::proc_add_opt_arg(proc, "resolution_parameter", mgp::type_float(), default_resolution_parameter); | ||
mgp::proc_add_opt_arg(proc, "number_of_iterations", mgp::type_int(), default_max_iterations); | ||
|
||
mgp::proc_add_result(proc, kFieldNode, mgp::type_node()); | ||
mgp::proc_add_result(proc, kFieldCommunity, mgp::type_int()); | ||
mgp::proc_add_result(proc, kFieldCommunities, mgp::type_list(mgp::type_int())); | ||
} | ||
|
||
{ | ||
auto *proc = mgp::module_add_read_procedure(module, kProcedureGetSubgraph, OnSubgraph); | ||
|
||
mgp::proc_add_arg(proc, kArgumentSubgraphNodes, mgp::type_list(mgp::type_node())); | ||
mgp::proc_add_arg(proc, kArgumentSubgraphRelationships, mgp::type_list(mgp::type_relationship())); | ||
mgp::proc_add_opt_arg(proc, "weight_property", mgp::type_string(), default_weight_property); | ||
mgp::proc_add_opt_arg(proc, "gamma", mgp::type_float(), default_gamma); | ||
mgp::proc_add_opt_arg(proc, "theta", mgp::type_float(), default_theta); | ||
mgp::proc_add_opt_arg(proc, "resolution_parameter", mgp::type_float(), default_resolution_parameter); | ||
mgp::proc_add_opt_arg(proc, "number_of_iterations", mgp::type_int(), default_max_iterations); | ||
|
||
mgp::proc_add_result(proc, kFieldNode, mgp::type_node()); | ||
mgp::proc_add_result(proc, kFieldCommunity, mgp::type_int()); | ||
mgp::proc_add_result(proc, kFieldCommunities, mgp::type_list(mgp::type_int())); | ||
} | ||
|
||
mgp::value_destroy(default_gamma); | ||
mgp::value_destroy(default_theta); | ||
mgp::value_destroy(default_weight_property); | ||
} catch (const std::exception &e) { | ||
return 1; | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
|
||
|
||
extern "C" int mgp_shutdown_module() { return 0; } | ||
|
||
} // namespace |
20 changes: 20 additions & 0 deletions
20
cpp/leiden_community_detection_module/leiden_utils/leiden_utils.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#include "leiden_utils.hpp" | ||
#include <cstdint> | ||
namespace leiden_alg { | ||
|
||
// create new intermediary community ids -> nodes that are in community i are children of the new intermediary community | ||
// id | ||
void CreateIntermediaryCommunities(Dendrogram &intermediary_communities, | ||
const std::vector<std::vector<std::uint64_t>> &communities, | ||
std::uint64_t current_level) { | ||
for (std::uint64_t i = 0; i < communities.size(); i++) { | ||
const auto new_intermediary_community_id = | ||
std::make_shared<IntermediaryCommunityId>(IntermediaryCommunityId{i, current_level + 1, nullptr}); | ||
for (const auto &node_id : communities[i]) { | ||
intermediary_communities[current_level][node_id]->parent = new_intermediary_community_id; | ||
} | ||
intermediary_communities[current_level + 1].push_back(new_intermediary_community_id); | ||
} | ||
} | ||
|
||
} // namespace leiden_alg |
143 changes: 143 additions & 0 deletions
143
cpp/leiden_community_detection_module/leiden_utils/leiden_utils.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
#pragma once | ||
|
||
#include <memory> | ||
#include <vector> | ||
|
||
#include <boost/unordered_map.hpp> | ||
#include <boost/unordered_set.hpp> | ||
|
||
namespace leiden_alg { | ||
|
||
/// | ||
/// @brief A struct representing a graph using an adjacency list. | ||
/// | ||
/// The graph is stored as a vector of adjacency lists, where each index corresponds to a node_id. | ||
/// Each adjacency list contains pairs of neighboring node identifiers and the weights of edges to those neighbors. | ||
/// | ||
struct Graph { | ||
Graph() = default; | ||
explicit Graph(std::uint64_t size) : adjacency_list(size), node_weights(size, 0.0) {} | ||
|
||
/// | ||
/// @brief Adds a directed edge between two nodes in the graph with an optional weight. | ||
/// | ||
/// @param u The identifier of the source node. | ||
/// @param v The identifier of the destination node. | ||
/// @param edge_weight The weight of the edge (default is 1.0). | ||
/// | ||
void AddEdge(std::uint64_t u, std::uint64_t v, double edge_weight = 1.0) { | ||
adjacency_list[u].emplace_back(v, edge_weight); | ||
} | ||
|
||
/// | ||
/// @brief Checks if a given node is present in the graph. | ||
/// | ||
/// @param u The identifier of the node. | ||
/// @return True if the node is present, false otherwise. | ||
/// | ||
bool IsVertexInGraph(std::uint64_t u) const { return u < adjacency_list.size(); } | ||
|
||
/// | ||
/// @brief Returns the number of nodes in the graph. | ||
/// | ||
/// @return The size of the graph (number of nodes). | ||
/// | ||
std::size_t Size() const { return adjacency_list.size(); } | ||
|
||
/// | ||
/// @brief Retrieves the neighbors and edge weights of a given node. | ||
/// | ||
/// @param node_id The identifier of the node whose neighbors are requested. | ||
/// @return A constant reference to a vector of pairs representing the neighbors and the edge weights. | ||
/// | ||
const std::vector<std::pair<std::uint64_t, double>> &Neighbors(std::uint64_t node_id) const { | ||
return adjacency_list[node_id]; | ||
} | ||
|
||
/// | ||
/// @brief Updates the weight of a given node. | ||
/// | ||
/// @param node_id The identifier of the node. | ||
/// @param weight The new weight of the node. | ||
/// | ||
void UpdateNodeWeight(std::uint64_t node_id, double weight = 1.0) { node_weights[node_id] += weight; } | ||
|
||
/// | ||
/// @brief Retrieves the weight of a given node. | ||
/// | ||
/// @param node_id The identifier of the node. | ||
/// @return The weight of the node. | ||
/// | ||
double GetNodeWeight(std::uint64_t node_id) const { return node_weights[node_id]; } | ||
|
||
std::vector<std::vector<std::pair<std::uint64_t, double>>> adjacency_list; ///< node_id -> (neighbor_id, edge_weight) | ||
std::vector<double> node_weights; ///< node_id -> node_weight | ||
}; | ||
|
||
/// | ||
/// @brief A struct representing a partitioning of nodes into communities. | ||
/// | ||
/// The partitions are stored in two ways: | ||
/// - A vector of vectors where each community_id maps to the nodes within that community. | ||
/// - A vector that maps each node_id to its community_id. | ||
/// | ||
struct Partitions { | ||
std::vector<std::vector<std::uint64_t>> communities; ///< community_id -> node_ids within the community. | ||
std::vector<std::uint64_t> community_id; ///< node_id -> community_id. | ||
|
||
/// | ||
/// @brief Retrieves the community identifier for a given node. | ||
/// | ||
/// @param node_id The identifier of the node. | ||
/// @return The community identifier that the node belongs to. | ||
/// | ||
std::uint64_t GetCommunityForNode(std::uint64_t node_id) const { return community_id[node_id]; } | ||
|
||
/// | ||
/// @brief Retrieves the weight (size) of a given community. | ||
/// | ||
/// @param community_id The identifier of the community. | ||
/// @return The number of nodes in the community. | ||
/// | ||
std::uint64_t GetCommunityWeight(std::uint64_t community_id) const { return communities[community_id].size(); } | ||
}; | ||
|
||
/// | ||
/// @brief A struct representing an intermediary community in a hierarchical clustering process. | ||
/// | ||
/// This struct is used in the construction of a dendrogram (a tree-like structure) where each level of the tree | ||
/// represents a different stage of community detection. Each intermediary community contains: | ||
/// - A community_id: The identifier for this community. | ||
/// - A level: The current level of the community in the hierarchy. | ||
/// - A parent: A pointer to the parent community at the previous level. | ||
/// | ||
struct IntermediaryCommunityId { | ||
std::uint64_t community_id; ///< The identifier of the community. | ||
std::uint64_t level; ///< The level of this community in the hierarchy. | ||
std::shared_ptr<IntermediaryCommunityId> parent; ///< A shared pointer to the parent community. | ||
}; | ||
|
||
/// | ||
/// @brief Alias for a dendrogram, a hierarchical structure of communities. | ||
/// | ||
/// A dendrogram is represented as a vector of vectors, where each inner vector contains pointers to | ||
/// intermediary community identifiers. Each level of the dendrogram corresponds to a different stage | ||
/// in the hierarchical community detection process. | ||
/// | ||
using Dendrogram = std::vector<std::vector<std::shared_ptr<IntermediaryCommunityId>>>; | ||
|
||
/// | ||
/// @brief Creates intermediary communities at a given level in the dendrogram. | ||
/// | ||
/// This method constructs the hierarchy for the dendrogram by assigning community identifiers and setting the parent | ||
/// relationships for the current level. | ||
/// | ||
/// @param intermediary_communities A reference to the dendrogram, which is updated with new intermediary communities. | ||
/// @param communities The current set of communities, represented as a vector of node_ids. | ||
/// @param current_level The level at which the intermediary communities are being created. | ||
/// | ||
void CreateIntermediaryCommunities(Dendrogram &intermediary_communities, | ||
const std::vector<std::vector<std::uint64_t>> &communities, | ||
std::uint64_t current_level); | ||
|
||
} // namespace leiden_alg |
Oops, something went wrong.