rapidsai · rapids-bot · May 23, 2023 · Apr 25, 2023 · Apr 26, 2023 · May 5, 2023
@@ -697,7 +697,9 @@ void flatten_dendrogram(raft::handle_t const& handle,
  * Supported value : int (signed, 32-bit)
  * @tparam weight_t Type of edge weights. Supported values : float or double.
  *
- * @param[in] handle Library handle (RAFT). If a communicator is set in the handle,
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param rng_state The RngState instance holding pseudo-random number generator state.
  * @param graph_view Graph view object.
  * @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p
  * edge_weight_view.has_value() == false, edge weights are assumed to be 1.0.
@@ -707,6 +709,10 @@ void flatten_dendrogram(raft::handle_t const& handle,
  * of the communities. Higher resolutions lead to more smaller
  * communities, lower resolutions lead to fewer larger
  * communities. (default 1)
+ * @param[in] theta (optional) The value of the parameter to scale modularity
+ * gain in Leiden refinement phase. It is used to compute
+ * the probability of joining a random leiden community.
+ * Called theta in the Leiden algorithm.
  *
  * @return a pair containing:
  * 1) unique pointer to dendrogram
@@ -716,10 +722,12 @@ void flatten_dendrogram(raft::handle_t const& handle,
 template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
 std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
  raft::handle_t const& handle,
+ raft::random::RngState& rng_state,
  graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
  std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
  size_t max_level = 100,
- weight_t resolution = weight_t{1});
+ weight_t resolution = weight_t{1},
+ weight_t theta = weight_t{1});
 
 /**
  * @brief Leiden implementation
@@ -741,7 +749,9 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
  * Supported value : int (signed, 32-bit)
  * @tparam weight_t Type of edge weights. Supported values : float or double.
  *
- * @param[in] handle Library handle (RAFT). If a communicator is set in the handle,
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param rng_state The RngState instance holding pseudo-random number generator state.
  * @param graph_view Graph view object.
  * @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p
  * edge_weight_view.has_value() == false, edge weights are assumed to be 1.0.
@@ -751,6 +761,11 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
  * of the communities. Higher resolutions lead to more smaller
  * communities, lower resolutions lead to fewer larger
  * communities. (default 1)
+ * @param[in] theta (optional) The value of the parameter to scale modularity
+ * gain in Leiden refinement phase. It is used to compute
+ * the probability of joining a random leiden community.
+ * Called theta in the Leiden algorithm.
+ * communities. (default 1)
  *
  * @return a pair containing:
  * 1) number of levels of the returned clustering
@@ -759,11 +774,13 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
 template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
 std::pair<size_t, weight_t> leiden(
  raft::handle_t const& handle,
+ raft::random::RngState& rng_state,
  graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
  std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
  vertex_t* clustering, // FIXME: Use (device_)span instead
  size_t max_level = 100,
- weight_t resolution = weight_t{1});
+ weight_t resolution = weight_t{1},
+ weight_t theta = weight_t{1});
 
 /**
  * @brief Computes the ecg clustering of the given graph.
@@ -1992,6 +2009,24 @@ std::tuple<rmm::device_uvector<size_t>, rmm::device_uvector<vertex_t>> k_hop_nbr
  size_t k,
  bool do_expensive_check = false);
 
+/*
+ * @brief Find a Maximal Independent Set
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
+ * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param graph_view Graph view object.
+ * @return A device vector containing vertices found in the maximal independent set
+ */
+
+template <typename vertex_t, typename edge_t, bool multi_gpu>
+rmm::device_uvector<vertex_t> maximal_independent_set(
+ raft::handle_t const& handle,
+ graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
+ raft::random::RngState& rng_state);
+
 } // namespace cugraph
 
 /**

@@ -19,6 +19,7 @@
 #include <cugraph_c/error.h>
 #include <cugraph_c/graph.h>
 #include <cugraph_c/graph_functions.h>
+#include <cugraph_c/random.h>
 #include <cugraph_c/resource_handle.h>
 
 /** @defgroup community Community algorithms
@@ -130,6 +131,7 @@ cugraph_error_code_t cugraph_louvain(const cugraph_resource_handle_t* handle,
  * @return error code
  */
 cugraph_error_code_t cugraph_leiden(const cugraph_resource_handle_t* handle,
+ cugraph_rng_state_t* rng_state,
  cugraph_graph_t* graph,
  size_t max_level,
  double resolution,

@@ -20,6 +20,7 @@
 #include <c_api/graph.hpp>
 #include <c_api/graph_helper.hpp>
 #include <c_api/hierarchical_clustering_result.hpp>
+#include <c_api/random.hpp>
 #include <c_api/resource_handle.hpp>
 #include <c_api/utils.hpp>
 
@@ -28,25 +29,30 @@
 #include <cugraph/detail/utility_wrappers.hpp>
 #include <cugraph/graph_functions.hpp>
 
+#include <raft/core/handle.hpp>
+
 #include <optional>
 
 namespace {
 
 struct leiden_functor : public cugraph::c_api::abstract_functor {
  raft::handle_t const& handle_;
- cugraph::c_api::cugraph_graph_t* graph_;
+ cugraph::c_api::cugraph_rng_state_t* rng_state_{nullptr};
+ cugraph::c_api::cugraph_graph_t* graph_{nullptr};
  size_t max_level_;
  double resolution_;
  bool do_expensive_check_;
  cugraph::c_api::cugraph_hierarchical_clustering_result_t* result_{};
 
  leiden_functor(::cugraph_resource_handle_t const* handle,
+ cugraph_rng_state_t* rng_state,
  ::cugraph_graph_t* graph,
  size_t max_level,
  double resolution,
  bool do_expensive_check)
  : abstract_functor(),
  handle_(*reinterpret_cast<cugraph::c_api::cugraph_resource_handle_t const*>(handle)->handle_),
+ rng_state_(reinterpret_cast<cugraph::c_api::cugraph_rng_state_t*>(rng_state)),
  graph_(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)),
  max_level_(max_level),
  resolution_(resolution),
@@ -64,10 +70,6 @@ struct leiden_functor : public cugraph::c_api::abstract_functor {
  {
  if constexpr (!cugraph::is_candidate<vertex_t, edge_t, weight_t>::value) {
  unsupported();
- } else if constexpr (multi_gpu) {
- error_code_ = CUGRAPH_NOT_IMPLEMENTED;
- error_->error_message_ = "leiden not currently implemented for multi-GPU";
-
  } else {
  // leiden expects store_transposed == false
  if constexpr (store_transposed) {
@@ -98,6 +100,7 @@ struct leiden_functor : public cugraph::c_api::abstract_functor {
  // coarsened graphs.
  auto [level, modularity] =
  cugraph::leiden(handle_,
+ rng_state_->rng_state_,
  graph_view,
  (edge_weights != nullptr)
  ? std::make_optional(edge_weights->view())
@@ -123,14 +126,15 @@ struct leiden_functor : public cugraph::c_api::abstract_functor {
 } // namespace
 
 extern "C" cugraph_error_code_t cugraph_leiden(const cugraph_resource_handle_t* handle,
+ cugraph_rng_state_t* rng_state,
  cugraph_graph_t* graph,
  size_t max_level,
  double resolution,
  bool_t do_expensive_check,
  cugraph_hierarchical_clustering_result_t** result,
  cugraph_error_t** error)
 {
- leiden_functor functor(handle, graph, max_level, resolution, do_expensive_check);
+ leiden_functor functor(handle, rng_state, graph, max_level, resolution, do_expensive_check);
 
  return cugraph::c_api::run_algorithm(graph, functor, result, error);
 }
@@ -16,48 +16,38 @@
  */
 #pragma once
 
+#include <community/mis.hpp>
 #include <prims/fill_edge_src_dst_property.cuh>
 #include <prims/per_v_transform_reduce_incoming_outgoing_e.cuh>
 #include <prims/update_edge_src_dst_property.cuh>
 
-#include <community/detail/mis.hpp>
 #include <cugraph/edge_property.hpp>
 #include <cugraph/edge_src_dst_property.hpp>
 #include <cugraph/graph_functions.hpp>
 #include <cugraph/graph_view.hpp>
 #include <cugraph/utilities/host_scalar_comm.hpp>
 
-#include <raft/util/cudart_utils.hpp>
-#include <raft/util/integer_utils.hpp>
-#include <rmm/exec_policy.hpp>
-
 #include <thrust/count.h>
 #include <thrust/distance.h>
-#include <thrust/execution_policy.h>
-#include <thrust/fill.h>
-#include <thrust/functional.h>
 #include <thrust/iterator/counting_iterator.h>
+#include <thrust/merge.h>
 #include <thrust/optional.h>
 #include <thrust/remove.h>
-#include <thrust/sequence.h>
-#include <thrust/shuffle.h>
+#include <thrust/set_operations.h>
 #include <thrust/transform.h>
 #include <thrust/transform_reduce.h>
 
 #include <cmath>
-#include <numeric>
-#include <type_traits>
-#include <utility>
 
 namespace cugraph {
 
 namespace detail {
 
-template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
-rmm::device_uvector<vertex_t> compute_mis(
+template <typename vertex_t, typename edge_t, bool multi_gpu>
+rmm::device_uvector<vertex_t> maximal_independent_set(
  raft::handle_t const& handle,
  cugraph::graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
- std::optional<cugraph::edge_property_view_t<edge_t, weight_t const*>> edge_weight_view)
+ raft::random::RngState& rng_state)
 {
  using GraphViewType = cugraph::graph_view_t<vertex_t, edge_t, false, multi_gpu>;
 
@@ -88,25 +78,17 @@ rmm::device_uvector<vertex_t> compute_mis(
  thrust::copy(handle.get_thrust_policy(), vertex_begin, vertex_end, ranks.begin());
 
  // Set ranks of zero out-degree vetices to std::numeric_limits<vertex_t>::lowest()
- thrust::for_each(
+ thrust::transform_if(
  handle.get_thrust_policy(),
- vertex_begin,
- vertex_end,
- [out_degrees = raft::device_span<edge_t const>(out_degrees.data(), out_degrees.size()),
- ranks = raft::device_span<vertex_t>(ranks.data(), ranks.size()),
- v_first = graph_view.local_vertex_partition_range_first()] __device__(auto v) {
- auto v_offset = v - v_first;
- if (out_degrees[v_offset] == 0) { ranks[v_offset] = std::numeric_limits<vertex_t>::lowest(); }
- });
+ out_degrees.begin(),
+ out_degrees.end(),
+ ranks.begin(),
+ [] __device__(auto) { return std::numeric_limits<vertex_t>::lowest(); },
+ [] __device__(auto deg) { return deg == 0; });
 
  out_degrees.resize(0, handle.get_stream());
  out_degrees.shrink_to_fit(handle.get_stream());
 
- thrust::default_random_engine g;
- size_t seed = 0;
- if constexpr (multi_gpu) { seed = handle.get_comms().get_rank(); }
- g.seed(seed);
-
  size_t loop_counter = 0;
  while (true) {
  loop_counter++;
@@ -117,22 +99,48 @@ rmm::device_uvector<vertex_t> compute_mis(
  thrust::copy(handle.get_thrust_policy(), ranks.begin(), ranks.end(), temporary_ranks.begin());
 
  // Select a random set of candidate vertices
- // FIXME: use common utility function to select a subset of remaining vertices
- // and for MG extension, select from disributed array remaining vertices
- thrust::shuffle(
- handle.get_thrust_policy(), remaining_vertices.begin(), remaining_vertices.end(), g);
 
- vertex_t nr_candidates =
- (remaining_vertices.size() < 1024)
- ? remaining_vertices.size()
- : std::min(static_cast<vertex_t>((0.50 + 0.25 * loop_counter) * remaining_vertices.size()),
- static_cast<vertex_t>(remaining_vertices.size()));
+ vertex_t nr_remaining_vertices_to_check = remaining_vertices.size();
+ if (multi_gpu) {
+ nr_remaining_vertices_to_check = host_scalar_allreduce(handle.get_comms(),
+ nr_remaining_vertices_to_check,
+ raft::comms::op_t::SUM,
+ handle.get_stream());
+ }
+
+ vertex_t nr_candidates = (nr_remaining_vertices_to_check < 1024)
+ ? nr_remaining_vertices_to_check
+ : std::min(static_cast<vertex_t>((0.50 + 0.25 * loop_counter) *
+ nr_remaining_vertices_to_check),
+ nr_remaining_vertices_to_check);
+
+ // FIXME: Can we improve performance here?
+ // FIXME: if(nr_remaining_vertices_to_check < 1024), may avoid calling select_random_vertices
+ auto d_sampled_vertices =
+ cugraph::select_random_vertices(handle,
+ graph_view,
+ std::make_optional(raft::device_span<vertex_t const>{
+ remaining_vertices.data(), remaining_vertices.size()}),
+ rng_state,
+ nr_candidates,
+ false,
+ true);
+
+ rmm::device_uvector<vertex_t> non_candidate_vertices(
+ remaining_vertices.size() - d_sampled_vertices.size(), handle.get_stream());
+
+ thrust::set_difference(handle.get_thrust_policy(),
+ remaining_vertices.begin(),
+ remaining_vertices.end(),
+ d_sampled_vertices.begin(),
+ d_sampled_vertices.end(),
+ non_candidate_vertices.begin());
 
  // Set temporary ranks of non-candidate vertices to std::numeric_limits<vertex_t>::lowest()
  thrust::for_each(
  handle.get_thrust_policy(),
- remaining_vertices.begin(),
- remaining_vertices.end() - nr_candidates,
+ non_candidate_vertices.begin(),
+ non_candidate_vertices.end(),
  [temporary_ranks =
  raft::device_span<vertex_t>(temporary_ranks.data(), temporary_ranks.size()),
  v_first = graph_view.local_vertex_partition_range_first()] __device__(auto v) {
@@ -160,7 +168,6 @@ rmm::device_uvector<vertex_t> compute_mis(
 
  //
  // Find maximum rank outgoing neighbor for each vertex
- // (In case of Leiden decision graph, each vertex has at most one outgoing edge)
  //
 
  rmm::device_uvector<vertex_t> max_outgoing_ranks(local_vtx_partitoin_size, handle.get_stream());
@@ -224,8 +231,8 @@ rmm::device_uvector<vertex_t> compute_mis(
  //
  auto last = thrust::remove_if(
  handle.get_thrust_policy(),
- remaining_vertices.end() - nr_candidates,
- remaining_vertices.end(),
+ d_sampled_vertices.begin(),
+ d_sampled_vertices.end(),
  [max_rank_neighbor_first = max_outgoing_ranks.begin(),
  ranks = raft::device_span<vertex_t>(ranks.data(), ranks.size()),
  v_first = graph_view.local_vertex_partition_range_first()] __device__(auto v) {
@@ -252,11 +259,23 @@ rmm::device_uvector<vertex_t> compute_mis(
  max_outgoing_ranks.resize(0, handle.get_stream());
  max_outgoing_ranks.shrink_to_fit(handle.get_stream());
 
- remaining_vertices.resize(thrust::distance(remaining_vertices.begin(), last),
+ d_sampled_vertices.resize(thrust::distance(d_sampled_vertices.begin(), last),
+ handle.get_stream());
+ d_sampled_vertices.shrink_to_fit(handle.get_stream());
+
+ remaining_vertices.resize(non_candidate_vertices.size() + d_sampled_vertices.size(),
  handle.get_stream());
  remaining_vertices.shrink_to_fit(handle.get_stream());
 
- vertex_t nr_remaining_vertices_to_check = remaining_vertices.size();
+ // merge non-candidate and remaining candidate vertices
+ thrust::merge(handle.get_thrust_policy(),
+ non_candidate_vertices.begin(),
+ non_candidate_vertices.end(),
+ d_sampled_vertices.begin(),
+ d_sampled_vertices.end(),
+ remaining_vertices.begin());
+
+ nr_remaining_vertices_to_check = remaining_vertices.size();
  if (multi_gpu) {
  nr_remaining_vertices_to_check = host_scalar_allreduce(handle.get_comms(),
  nr_remaining_vertices_to_check,
@@ -289,4 +308,14 @@ rmm::device_uvector<vertex_t> compute_mis(
  return mis;
 }
 } // namespace detail
+
+template <typename vertex_t, typename edge_t, bool multi_gpu>
+rmm::device_uvector<vertex_t> maximal_independent_set(
+ raft::handle_t const& handle,
+ graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
+ raft::random::RngState& rng_state)
+{
+ return detail::maximal_independent_set(handle, graph_view, rng_state);
+}
+
 } // namespace cugraph