From 78b380ec7f4fbffaa99b090b193218c02d2dd066 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Fri, 17 May 2024 15:47:49 -0400 Subject: [PATCH] Expose graph_view method to count multi edges through C API and PLC (#4426) Python property graph class does some expensive dask calls to determine if a graph is a multi-graph. This PR exposes to the C API and the PLC layer the graph_view method that counts the number of multi-edges in the graph. Specifically, the function returns the number of extra edges exist. So if the edge `(u,v)` exists `k` times, that edge contributes `k-1` to the return value. If the return value of this function is greater than 0 then the graph is a multi-graph. If the return value is 0 then while the graph might be labeled a multi-graph, there are no multi-edges so it could be treated as a graph. Closes #2417 Note that the property graph python code would still need to be modified to use this function. Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Joseph Nke (https://github.com/jnke2016) - Alex Barghi (https://github.com/alexbarghi-nv) URL: https://github.com/rapidsai/cugraph/pull/4426 --- cpp/include/cugraph_c/graph_functions.h | 20 +++ cpp/src/c_api/graph_functions.cpp | 49 ++++++++ cpp/tests/CMakeLists.txt | 2 + cpp/tests/c_api/count_multi_edges_test.c | 114 +++++++++++++++++ cpp/tests/c_api/mg_count_multi_edges_test.c | 118 ++++++++++++++++++ .../_cugraph_c/graph_functions.pxd | 11 ++ .../pylibcugraph/count_multi_edges.pyx | 96 ++++++++++++++ 7 files changed, 410 insertions(+) create mode 100644 cpp/tests/c_api/count_multi_edges_test.c create mode 100644 cpp/tests/c_api/mg_count_multi_edges_test.c create mode 100644 python/pylibcugraph/pylibcugraph/count_multi_edges.pyx diff --git a/cpp/include/cugraph_c/graph_functions.h b/cpp/include/cugraph_c/graph_functions.h index 94b06189796..ff7e439232a 100644 --- a/cpp/include/cugraph_c/graph_functions.h +++ b/cpp/include/cugraph_c/graph_functions.h @@ -229,6 +229,26 @@ cugraph_error_code_t cugraph_allgather(const cugraph_resource_handle_t* handle, cugraph_induced_subgraph_result_t** result, cugraph_error_t** error); +/** + * @brief Count multi_edges + * + * Count the number of multi-edges in the graph + * + * @param [in] handle Handle for accessing resources. + * @param [in] graph Pointer to graph + * @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to + * true) + * @param [out] result Where to store the count of multi-edges + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_count_multi_edges(const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + bool_t do_expensive_check, + size_t* result, + cugraph_error_t** error); + /** * @brief Opaque degree result type */ diff --git a/cpp/src/c_api/graph_functions.cpp b/cpp/src/c_api/graph_functions.cpp index 35f7086d726..91371b988b3 100644 --- a/cpp/src/c_api/graph_functions.cpp +++ b/cpp/src/c_api/graph_functions.cpp @@ -214,6 +214,44 @@ struct two_hop_neighbors_functor : public cugraph::c_api::abstract_functor { } }; +struct count_multi_edges_functor : public cugraph::c_api::abstract_functor { + raft::handle_t const& handle_{}; + cugraph::c_api::cugraph_graph_t* graph_{nullptr}; + size_t result_{}; + bool do_expensive_check_{false}; + + count_multi_edges_functor(::cugraph_resource_handle_t const* handle, + ::cugraph_graph_t* graph, + bool do_expensive_check) + : abstract_functor(), + handle_(*reinterpret_cast(handle)->handle_), + graph_(reinterpret_cast(graph)), + do_expensive_check_(do_expensive_check) + { + } + + template + void operator()() + { + if constexpr (!cugraph::is_candidate::value) { + unsupported(); + } else { + auto graph = + reinterpret_cast*>( + graph_->graph_); + + auto graph_view = graph->view(); + + result_ = static_cast(graph_view.count_multi_edges(handle_)); + } + } +}; + } // namespace extern "C" cugraph_error_code_t cugraph_create_vertex_pairs( @@ -281,3 +319,14 @@ extern "C" cugraph_error_code_t cugraph_two_hop_neighbors( return cugraph::c_api::run_algorithm(graph, functor, result, error); } + +extern "C" cugraph_error_code_t cugraph_count_multi_edges(const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + bool_t do_expensive_check, + size_t* result, + cugraph_error_t** error) +{ + count_multi_edges_functor functor(handle, graph, do_expensive_check); + + return cugraph::c_api::run_algorithm(graph, functor, result, error); +} diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index fd08758009e..19097add541 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -733,6 +733,7 @@ if(BUILD_CUGRAPH_MG_TESTS) ConfigureCTestMG(MG_CAPI_K_CORE_TEST c_api/mg_k_core_test.c) ConfigureCTestMG(MG_CAPI_INDUCED_SUBGRAPH_TEST c_api/mg_induced_subgraph_test.c) ConfigureCTestMG(MG_CAPI_DEGREES c_api/mg_degrees_test.c) + ConfigureCTestMG(MG_CAPI_COUNT_MULTI_EDGES c_api/mg_count_multi_edges_test.c) ConfigureCTestMG(MG_CAPI_EGONET_TEST c_api/mg_egonet_test.c) ConfigureCTestMG(MG_CAPI_TWO_HOP_NEIGHBORS_TEST c_api/mg_two_hop_neighbors_test.c) @@ -777,6 +778,7 @@ ConfigureCTest(CAPI_SIMILARITY_TEST c_api/similarity_test.c) ConfigureCTest(CAPI_K_CORE_TEST c_api/k_core_test.c) ConfigureCTest(CAPI_INDUCED_SUBGRAPH_TEST c_api/induced_subgraph_test.c) ConfigureCTest(CAPI_DEGREES c_api/degrees_test.c) +ConfigureCTest(CAPI_COUNT_MULTI_EDGES c_api/count_multi_edges_test.c) ConfigureCTest(CAPI_EGONET_TEST c_api/egonet_test.c) ConfigureCTest(CAPI_TWO_HOP_NEIGHBORS_TEST c_api/two_hop_neighbors_test.c) ConfigureCTest(CAPI_K_TRUSS_TEST c_api/k_truss_test.c) diff --git a/cpp/tests/c_api/count_multi_edges_test.c b/cpp/tests/c_api/count_multi_edges_test.c new file mode 100644 index 00000000000..222cf12ea36 --- /dev/null +++ b/cpp/tests/c_api/count_multi_edges_test.c @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "c_test_utils.h" /* RUN_TEST */ + +#include +#include + +#include + +typedef int32_t vertex_t; +typedef int32_t edge_t; +typedef float weight_t; + +data_type_id_t vertex_tid = INT32; +data_type_id_t edge_tid = INT32; +data_type_id_t weight_tid = FLOAT32; +data_type_id_t edge_id_tid = INT32; +data_type_id_t edge_type_tid = INT32; + +/* + * Create graph and count multi-edges + */ +int generic_count_multi_edges_test(vertex_t* h_src, + vertex_t* h_dst, + weight_t* h_wgt, + size_t num_vertices, + size_t num_edges, + bool_t store_transposed, + bool_t is_symmetric, + bool_t is_multigraph, + size_t multi_edges_count) +{ + int test_ret_value = 0; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + + cugraph_resource_handle_t* handle = NULL; + cugraph_graph_t* graph = NULL; + size_t result = 0; + + handle = cugraph_create_resource_handle(NULL); + TEST_ASSERT(test_ret_value, handle != NULL, "resource handle creation failed."); + + ret_code = create_sg_test_graph(handle, + vertex_tid, + edge_tid, + h_src, + h_dst, + weight_tid, + h_wgt, + edge_type_tid, + NULL, + edge_id_tid, + NULL, + num_edges, + store_transposed, + FALSE, + is_symmetric, + is_multigraph, + &graph, + &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + ret_code = cugraph_count_multi_edges(handle, graph, FALSE, &result, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_count_multi_edges failed."); + + TEST_ASSERT(test_ret_value, result == multi_edges_count, "multi-edge count did not match"); + + cugraph_graph_free(graph); + cugraph_error_free(ret_error); + + return test_ret_value; +} + +int test_multi_edges_count() +{ + size_t num_edges = 14; + size_t num_vertices = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 0, 1, 1, 3, 0, 1}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 1, 3, 0, 1, 1, 0}; + weight_t h_wgt[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + size_t multi_edge_count = 4; + + return generic_count_multi_edges_test( + h_src, h_dst, h_wgt, num_vertices, num_edges, TRUE, TRUE, TRUE, multi_edge_count); +} + +/******************************************************************************/ + +int main(int argc, char** argv) +{ + int result = 0; + result |= RUN_TEST(test_multi_edges_count); + return result; +} diff --git a/cpp/tests/c_api/mg_count_multi_edges_test.c b/cpp/tests/c_api/mg_count_multi_edges_test.c new file mode 100644 index 00000000000..69eaaff40dc --- /dev/null +++ b/cpp/tests/c_api/mg_count_multi_edges_test.c @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mg_test_utils.h" /* RUN_TEST */ + +#include +#include + +#include + +typedef int32_t vertex_t; +typedef int32_t edge_t; +typedef float weight_t; + +data_type_id_t vertex_tid = INT32; +data_type_id_t edge_tid = INT32; +data_type_id_t weight_tid = FLOAT32; +data_type_id_t edge_id_tid = INT32; +data_type_id_t edge_type_tid = INT32; + +/* + * Create graph and count multi-edges + */ +int generic_count_multi_edges_test(const cugraph_resource_handle_t* handle, + vertex_t* h_src, + vertex_t* h_dst, + weight_t* h_wgt, + size_t num_vertices, + size_t num_edges, + bool_t store_transposed, + bool_t is_symmetric, + bool_t is_multigraph, + size_t multi_edges_count) +{ + int test_ret_value = 0; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + + cugraph_graph_t* graph = NULL; + size_t result = 0; + + ret_code = create_mg_test_graph_new(handle, + vertex_tid, + edge_tid, + h_src, + h_dst, + weight_tid, + h_wgt, + edge_type_tid, + NULL, + edge_id_tid, + NULL, + num_edges, + store_transposed, + FALSE, + is_symmetric, + is_multigraph, + &graph, + &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + ret_code = cugraph_count_multi_edges(handle, graph, FALSE, &result, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_count_multi_edges failed."); + + TEST_ASSERT(test_ret_value, result == multi_edges_count, "multi-edge count did not match"); + + cugraph_graph_free(graph); + cugraph_error_free(ret_error); + + return test_ret_value; +} + +int test_multi_edges_count(const cugraph_resource_handle_t* handle) +{ + size_t num_edges = 14; + size_t num_vertices = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 0, 1, 1, 3, 0, 1}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 1, 3, 0, 1, 1, 0}; + weight_t h_wgt[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + size_t multi_edge_count = 4; + + return generic_count_multi_edges_test( + handle, h_src, h_dst, h_wgt, num_vertices, num_edges, TRUE, TRUE, TRUE, multi_edge_count); +} + +/******************************************************************************/ + +int main(int argc, char** argv) +{ + void* raft_handle = create_mg_raft_handle(argc, argv); + cugraph_resource_handle_t* handle = cugraph_create_resource_handle(raft_handle); + + int result = 0; + result |= RUN_MG_TEST(test_multi_edges_count, handle); + + cugraph_free_resource_handle(handle); + free_mg_raft_handle(raft_handle); + + return result; +} diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd index 6f1ac1f640b..315c9bd7503 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd @@ -183,6 +183,17 @@ cdef extern from "cugraph_c/graph_functions.h": cugraph_error_t** error ) + ########################################################################### + # count multi-edges + cdef cugraph_error_code_t \ + cugraph_count_multi_edges( + const cugraph_resource_handle_t *handle, + cugraph_graph_t* graph, + bool_t do_expenive_check, + size_t *result, + cugraph_error_t** error + ) + ########################################################################### # degrees ctypedef struct cugraph_degrees_result_t: diff --git a/python/pylibcugraph/pylibcugraph/count_multi_edges.pyx b/python/pylibcugraph/pylibcugraph/count_multi_edges.pyx new file mode 100644 index 00000000000..d3780e53283 --- /dev/null +++ b/python/pylibcugraph/pylibcugraph/count_multi_edges.pyx @@ -0,0 +1,96 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Have cython use python 3 syntax +# cython: language_level = 3 + +from pylibcugraph._cugraph_c.resource_handle cimport ( + bool_t, + data_type_id_t, + cugraph_resource_handle_t, +) +from pylibcugraph._cugraph_c.error cimport ( + cugraph_error_code_t, + cugraph_error_t, +) +from pylibcugraph._cugraph_c.graph cimport ( + cugraph_graph_t, +) +from pylibcugraph._cugraph_c.graph_functions cimport ( + cugraph_count_multi_edges, +) +from pylibcugraph.resource_handle cimport ( + ResourceHandle, +) +from pylibcugraph.graphs cimport ( + _GPUGraph, +) + + +def count_multi_edges(ResourceHandle resource_handle, + _GPUGraph graph, + bool_t do_expensive_check): + """ + Count the number of multi-edges in the graph. This returns + the number of duplicates. If the edge (u, v) appears k times + in the graph, then that edge will contribute (k-1) toward the + total number of duplicates. + + Parameters + ---------- + resource_handle : ResourceHandle + Handle to the underlying device resources needed for referencing data + and running algorithms. + + graph : SGGraph or MGGraph + The input graph, for either Single or Multi-GPU operations. + + do_expensive_check : bool_t + A flag to run expensive checks for input arguments if True. + + Returns + ------- + Total count of duplicate edges in the graph + + Examples + -------- + >>> import pylibcugraph, cupy, numpy + >>> srcs = cupy.asarray([0, 0, 0], dtype=numpy.int32) + >>> dsts = cupy.asarray([1, 1, 1], dtype=numpy.int32) + >>> weights = cupy.asarray([1.0, 1.0, 1.0], dtype=numpy.float32) + >>> resource_handle = pylibcugraph.ResourceHandle() + >>> graph_props = pylibcugraph.GraphProperties( + ... is_symmetric=False, is_multigraph=False) + >>> G = pylibcugraph.SGGraph( + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, + ... store_transposed=True, renumber=False, do_expensive_check=False) + >>> count = pylibcugraph.count_multi_edges(resource_handle, G, False) + + """ + + cdef cugraph_resource_handle_t* c_resource_handle_ptr = \ + resource_handle.c_resource_handle_ptr + cdef cugraph_graph_t* c_graph_ptr = graph.c_graph_ptr + + cdef size_t result + cdef cugraph_error_code_t error_code + cdef cugraph_error_t* error_ptr + + error_code = cugraph_count_multi_edges(c_resource_handle_ptr, + c_graph_ptr, + do_expensive_check, + &result, + &error_ptr) + assert_success(error_code, error_ptr, "cugraph_count_multi_edges") + + return result;