Skip to content

Commit

Permalink
Expose threshold in louvain (rapidsai#3792)
Browse files Browse the repository at this point in the history
The threshold parameter (referred to as `epsilon` in most of the centrality measures) is used to define when to stop the iterative steps of Louvain.  Once the modularity increase for an iteration of Louvain is smaller than the threshold we will stop that iteration and start coarsening the graph.

This parameter was hard-coded in the initial C++ implementation of Louvain.  This PR exposes this parameter through the C++, C API, PLC and Python layers.

The PR also renames the python parameter `max_iter` to be `max_level`, which is more appropriate semantically.

Closes rapidsai#3791

Authors:
  - Chuck Hastings (https://github.com/ChuckHastings)

Approvers:
  - Seunghwa Kang (https://github.com/seunghwak)
  - Naim (https://github.com/naimnv)
  - Joseph Nke (https://github.com/jnke2016)
  - Rick Ratzel (https://github.com/rlratzel)

URL: rapidsai#3792
  • Loading branch information
ChuckHastings authored Sep 6, 2023
1 parent 6b57f56 commit 98324ac
Show file tree
Hide file tree
Showing 15 changed files with 228 additions and 51 deletions.
4 changes: 4 additions & 0 deletions cpp/include/cugraph/algorithms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,8 @@ weight_t hungarian(raft::handle_t const& handle,
* @param[in] graph input graph object
* @param[out] clustering Pointer to device array where the clustering should be stored
* @param[in] max_level (optional) maximum number of levels to run (default 100)
* @param[in] threshold (optional) threshold for convergence at each level (default
* 1e-7)
* @param[in] resolution (optional) The value of the resolution parameter to use.
* Called gamma in the modularity formula, this changes the size
* of the communities. Higher resolutions lead to more smaller
Expand All @@ -607,6 +609,7 @@ std::pair<size_t, weight_t> louvain(
std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
vertex_t* clustering,
size_t max_level = 100,
weight_t threshold = weight_t{1e-7},
weight_t resolution = weight_t{1});

template <typename vertex_t, typename edge_t, typename weight_t>
Expand Down Expand Up @@ -652,6 +655,7 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> louvain(
graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
size_t max_level = 100,
weight_t threshold = weight_t{1e-7},
weight_t resolution = weight_t{1});

/**
Expand Down
2 changes: 2 additions & 0 deletions cpp/include/cugraph_c/community_algorithms.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ typedef struct {
* @param [in] graph Pointer to graph. NOTE: Graph might be modified if the storage
* needs to be transposed
* @param [in] max_level Maximum level in hierarchy
* @param [in] threshold Threshold parameter, defines convergence at each level of hierarchy
* @param [in] resolution Resolution parameter (gamma) in modularity formula.
* This changes the size of the communities. Higher resolutions
* lead to more smaller communities, lower resolutions lead to
Expand All @@ -107,6 +108,7 @@ typedef struct {
cugraph_error_code_t cugraph_louvain(const cugraph_resource_handle_t* handle,
cugraph_graph_t* graph,
size_t max_level,
double threshold,
double resolution,
bool_t do_expensive_check,
cugraph_hierarchical_clustering_result_t** result,
Expand Down
7 changes: 6 additions & 1 deletion cpp/src/c_api/louvain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,22 @@ struct louvain_functor : public cugraph::c_api::abstract_functor {
raft::handle_t const& handle_;
cugraph::c_api::cugraph_graph_t* graph_;
size_t max_level_;
double threshold_;
double resolution_;
bool do_expensive_check_;
cugraph::c_api::cugraph_hierarchical_clustering_result_t* result_{};

louvain_functor(::cugraph_resource_handle_t const* handle,
::cugraph_graph_t* graph,
size_t max_level,
double threshold,
double resolution,
bool do_expensive_check)
: abstract_functor(),
handle_(*reinterpret_cast<cugraph::c_api::cugraph_resource_handle_t const*>(handle)->handle_),
graph_(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)),
max_level_(max_level),
threshold_(threshold),
resolution_(resolution),
do_expensive_check_(do_expensive_check)
{
Expand Down Expand Up @@ -102,6 +105,7 @@ struct louvain_functor : public cugraph::c_api::abstract_functor {
.view()),
clusters.data(),
max_level_,
static_cast<weight_t>(threshold_),
static_cast<weight_t>(resolution_));

rmm::device_uvector<vertex_t> vertices(graph_view.local_vertex_partition_range_size(),
Expand All @@ -121,12 +125,13 @@ struct louvain_functor : public cugraph::c_api::abstract_functor {
extern "C" cugraph_error_code_t cugraph_louvain(const cugraph_resource_handle_t* handle,
cugraph_graph_t* graph,
size_t max_level,
double threshold,
double resolution,
bool_t do_expensive_check,
cugraph_hierarchical_clustering_result_t** result,
cugraph_error_t** error)
{
louvain_functor functor(handle, graph, max_level, resolution, do_expensive_check);
louvain_functor functor(handle, graph, max_level, threshold, resolution, do_expensive_check);

return cugraph::c_api::run_algorithm(graph, functor, result, error);
}
9 changes: 6 additions & 3 deletions cpp/src/community/louvain_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> louvain(
graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
size_t max_level,
weight_t threshold,
weight_t resolution)
{
using graph_t = cugraph::graph_t<vertex_t, edge_t, false, multi_gpu>;
Expand Down Expand Up @@ -169,7 +170,7 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> louvain(
// during each iteration of the loop
bool up_down = true;

while (new_Q > (cur_Q + 0.0001)) {
while (new_Q > (cur_Q + threshold)) {
cur_Q = new_Q;

next_clusters_v = detail::update_clustering_by_delta_modularity(handle,
Expand Down Expand Up @@ -291,12 +292,13 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> louvain(
graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
size_t max_level,
weight_t threshold,
weight_t resolution)
{
CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented.");

CUGRAPH_EXPECTS(edge_weight_view.has_value(), "Graph must be weighted");
return detail::louvain(handle, graph_view, edge_weight_view, max_level, resolution);
return detail::louvain(handle, graph_view, edge_weight_view, max_level, threshold, resolution);
}

template <typename vertex_t, typename edge_t, bool multi_gpu>
Expand All @@ -317,6 +319,7 @@ std::pair<size_t, weight_t> louvain(
std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
vertex_t* clustering,
size_t max_level,
weight_t threshold,
weight_t resolution)
{
CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented.");
Expand All @@ -328,7 +331,7 @@ std::pair<size_t, weight_t> louvain(
weight_t modularity;

std::tie(dendrogram, modularity) =
detail::louvain(handle, graph_view, edge_weight_view, max_level, resolution);
detail::louvain(handle, graph_view, edge_weight_view, max_level, threshold, resolution);

detail::flatten_dendrogram(handle, graph_view, *dendrogram, clustering);

Expand Down
14 changes: 13 additions & 1 deletion cpp/src/community/louvain_mg.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -25,36 +25,42 @@ template std::pair<std::unique_ptr<Dendrogram<int32_t>>, float> louvain(
graph_view_t<int32_t, int32_t, false, true> const&,
std::optional<edge_property_view_t<int32_t, float const*>>,
size_t,
float,
float);
template std::pair<std::unique_ptr<Dendrogram<int32_t>>, float> louvain(
raft::handle_t const&,
graph_view_t<int32_t, int64_t, false, true> const&,
std::optional<edge_property_view_t<int64_t, float const*>>,
size_t,
float,
float);
template std::pair<std::unique_ptr<Dendrogram<int64_t>>, float> louvain(
raft::handle_t const&,
graph_view_t<int64_t, int64_t, false, true> const&,
std::optional<edge_property_view_t<int64_t, float const*>>,
size_t,
float,
float);
template std::pair<std::unique_ptr<Dendrogram<int32_t>>, double> louvain(
raft::handle_t const&,
graph_view_t<int32_t, int32_t, false, true> const&,
std::optional<edge_property_view_t<int32_t, double const*>>,
size_t,
double,
double);
template std::pair<std::unique_ptr<Dendrogram<int32_t>>, double> louvain(
raft::handle_t const&,
graph_view_t<int32_t, int64_t, false, true> const&,
std::optional<edge_property_view_t<int64_t, double const*>>,
size_t,
double,
double);
template std::pair<std::unique_ptr<Dendrogram<int64_t>>, double> louvain(
raft::handle_t const&,
graph_view_t<int64_t, int64_t, false, true> const&,
std::optional<edge_property_view_t<int64_t, double const*>>,
size_t,
double,
double);

template std::pair<size_t, float> louvain(
Expand All @@ -63,41 +69,47 @@ template std::pair<size_t, float> louvain(
std::optional<edge_property_view_t<int32_t, float const*>>,
int32_t*,
size_t,
float,
float);
template std::pair<size_t, double> louvain(
raft::handle_t const&,
graph_view_t<int32_t, int32_t, false, true> const&,
std::optional<edge_property_view_t<int32_t, double const*>>,
int32_t*,
size_t,
double,
double);
template std::pair<size_t, float> louvain(
raft::handle_t const&,
graph_view_t<int32_t, int64_t, false, true> const&,
std::optional<edge_property_view_t<int64_t, float const*>>,
int32_t*,
size_t,
float,
float);
template std::pair<size_t, double> louvain(
raft::handle_t const&,
graph_view_t<int32_t, int64_t, false, true> const&,
std::optional<edge_property_view_t<int64_t, double const*>>,
int32_t*,
size_t,
double,
double);
template std::pair<size_t, float> louvain(
raft::handle_t const&,
graph_view_t<int64_t, int64_t, false, true> const&,
std::optional<edge_property_view_t<int64_t, float const*>>,
int64_t*,
size_t,
float,
float);
template std::pair<size_t, double> louvain(
raft::handle_t const&,
graph_view_t<int64_t, int64_t, false, true> const&,
std::optional<edge_property_view_t<int64_t, double const*>>,
int64_t*,
size_t,
double,
double);

} // namespace cugraph
14 changes: 13 additions & 1 deletion cpp/src/community/louvain_sg.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -25,36 +25,42 @@ template std::pair<std::unique_ptr<Dendrogram<int32_t>>, float> louvain(
graph_view_t<int32_t, int32_t, false, false> const&,
std::optional<edge_property_view_t<int32_t, float const*>>,
size_t,
float,
float);
template std::pair<std::unique_ptr<Dendrogram<int32_t>>, float> louvain(
raft::handle_t const&,
graph_view_t<int32_t, int64_t, false, false> const&,
std::optional<edge_property_view_t<int64_t, float const*>>,
size_t,
float,
float);
template std::pair<std::unique_ptr<Dendrogram<int64_t>>, float> louvain(
raft::handle_t const&,
graph_view_t<int64_t, int64_t, false, false> const&,
std::optional<edge_property_view_t<int64_t, float const*>>,
size_t,
float,
float);
template std::pair<std::unique_ptr<Dendrogram<int32_t>>, double> louvain(
raft::handle_t const&,
graph_view_t<int32_t, int32_t, false, false> const&,
std::optional<edge_property_view_t<int32_t, double const*>>,
size_t,
double,
double);
template std::pair<std::unique_ptr<Dendrogram<int32_t>>, double> louvain(
raft::handle_t const&,
graph_view_t<int32_t, int64_t, false, false> const&,
std::optional<edge_property_view_t<int64_t, double const*>>,
size_t,
double,
double);
template std::pair<std::unique_ptr<Dendrogram<int64_t>>, double> louvain(
raft::handle_t const&,
graph_view_t<int64_t, int64_t, false, false> const&,
std::optional<edge_property_view_t<int64_t, double const*>>,
size_t,
double,
double);

template std::pair<size_t, float> louvain(
Expand All @@ -63,41 +69,47 @@ template std::pair<size_t, float> louvain(
std::optional<edge_property_view_t<int32_t, float const*>>,
int32_t*,
size_t,
float,
float);
template std::pair<size_t, double> louvain(
raft::handle_t const&,
graph_view_t<int32_t, int32_t, false, false> const&,
std::optional<edge_property_view_t<int32_t, double const*>>,
int32_t*,
size_t,
double,
double);
template std::pair<size_t, float> louvain(
raft::handle_t const&,
graph_view_t<int32_t, int64_t, false, false> const&,
std::optional<edge_property_view_t<int64_t, float const*>>,
int32_t*,
size_t,
float,
float);
template std::pair<size_t, double> louvain(
raft::handle_t const&,
graph_view_t<int32_t, int64_t, false, false> const&,
std::optional<edge_property_view_t<int64_t, double const*>>,
int32_t*,
size_t,
double,
double);
template std::pair<size_t, float> louvain(
raft::handle_t const&,
graph_view_t<int64_t, int64_t, false, false> const&,
std::optional<edge_property_view_t<int64_t, float const*>>,
int64_t*,
size_t,
float,
float);
template std::pair<size_t, double> louvain(
raft::handle_t const&,
graph_view_t<int64_t, int64_t, false, false> const&,
std::optional<edge_property_view_t<int64_t, double const*>>,
int64_t*,
size_t,
double,
double);

} // namespace cugraph
7 changes: 6 additions & 1 deletion cpp/tests/c_api/louvain_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ int generic_louvain_test(vertex_t* h_src,
size_t num_vertices,
size_t num_edges,
size_t max_level,
double threshold,
double resolution,
bool_t store_transposed)
{
Expand Down Expand Up @@ -60,7 +61,7 @@ int generic_louvain_test(vertex_t* h_src,
TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));

ret_code =
cugraph_louvain(p_handle, p_graph, max_level, resolution, FALSE, &p_result, &ret_error);
cugraph_louvain(p_handle, p_graph, max_level, threshold, resolution, FALSE, &p_result, &ret_error);

TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, "cugraph_louvain failed.");
Expand Down Expand Up @@ -108,6 +109,7 @@ int test_louvain()
size_t num_edges = 16;
size_t num_vertices = 6;
size_t max_level = 10;
weight_t threshold = 1e-7;
weight_t resolution = 1.0;

vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5};
Expand All @@ -126,6 +128,7 @@ int test_louvain()
num_vertices,
num_edges,
max_level,
threshold,
resolution,
FALSE);
}
Expand All @@ -135,6 +138,7 @@ int test_louvain_no_weight()
size_t num_edges = 16;
size_t num_vertices = 6;
size_t max_level = 10;
weight_t threshold = 1e-7;
weight_t resolution = 1.0;

vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5};
Expand All @@ -151,6 +155,7 @@ int test_louvain_no_weight()
num_vertices,
num_edges,
max_level,
threshold,
resolution,
FALSE);
}
Expand Down
Loading

0 comments on commit 98324ac

Please sign in to comment.