Skip to content

Commit

Permalink
MG Implementation K-Truss (#4438)
Browse files Browse the repository at this point in the history
This PR adds an MG implementation of K-Truss leveraging the C API

Authors:
  - Joseph Nke (https://github.com/jnke2016)
  - Ralph Liu (https://github.com/nv-rliu)

Approvers:
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Rick Ratzel (https://github.com/rlratzel)
  - Jake Awe (https://github.com/AyodeAwe)

URL: #4438
  • Loading branch information
jnke2016 authored Jul 31, 2024
1 parent c941748 commit 7b81173
Show file tree
Hide file tree
Showing 7 changed files with 238 additions and 88 deletions.
62 changes: 10 additions & 52 deletions python/cugraph/cugraph/community/ktruss_subgraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from cugraph.structure.graph_classes import Graph
from typing import Union

import cudf
from pylibcugraph import k_truss_subgraph as pylibcugraph_k_truss_subgraph
from pylibcugraph import ResourceHandle
from cugraph.structure.graph_classes import Graph
from cugraph.utilities import (
ensure_cugraph_obj_for_nx,
cugraph_to_nx,
)

from pylibcugraph import k_truss_subgraph as pylibcugraph_k_truss_subgraph
from pylibcugraph import ResourceHandle
import warnings

from numba import cuda
import cudf
from cugraph.utilities.utils import import_optional

# FIXME: the networkx.Graph type used in the type annotation for
Expand All @@ -34,37 +31,17 @@
networkx = import_optional("networkx")


# FIXME: special case for ktruss on CUDA 11.4: an 11.4 bug causes ktruss to
# crash in that environment. Allow ktruss to import on non-11.4 systems, but
# raise an exception if ktruss is directly imported on 11.4.
def _ensure_compatible_cuda_version():
try:
cuda_version = cuda.runtime.get_version()
except cuda.cudadrv.runtime.CudaRuntimeAPIError:
cuda_version = "n/a"

unsupported_cuda_version = (11, 4)

if cuda_version == unsupported_cuda_version:
ver_string = ".".join([str(n) for n in unsupported_cuda_version])
raise NotImplementedError(
"k_truss is not currently supported in CUDA" f" {ver_string} environments."
)


def k_truss(
G: Union[Graph, "networkx.Graph"], k: int
) -> Union[Graph, "networkx.Graph"]:
"""
Returns the K-Truss subgraph of a graph for a specific k.
NOTE: this function is currently not available on CUDA 11.4 systems.
The k-truss of a graph is a subgraph where each edge is part of at least
(k−2) triangles. K-trusses are used for finding tighlty knit groups of
vertices in a graph. A k-truss is a relaxation of a k-clique in the graph
and was define in [1]. Finding cliques is computationally demanding and
finding the maximal k-clique is known to be NP-Hard.
The k-truss of a graph is a subgraph where each edge is incident to at
least (k−2) triangles. K-trusses are used for finding tighlty knit groups
of vertices in a graph. A k-truss is a relaxation of a k-clique in the graph.
Finding cliques is computationally demanding and finding the maximal
k-clique is known to be NP-Hard.
Parameters
----------
Expand All @@ -89,9 +66,6 @@ def k_truss(
>>> k_subgraph = cugraph.k_truss(G, 3)
"""

_ensure_compatible_cuda_version()

G, isNx = ensure_cugraph_obj_for_nx(G)

if isNx is True:
Expand Down Expand Up @@ -159,12 +133,6 @@ def ktruss_subgraph(
k : int
The desired k to be used for extracting the k-truss subgraph.
use_weights : bool, optional (default=True)
Whether the output should contain the edge weights if G has them.
Deprecated: If 'weights' were passed at the graph creation, they will
be used.
Returns
-------
G_truss : cuGraph.Graph
Expand All @@ -177,20 +145,10 @@ def ktruss_subgraph(
>>> k_subgraph = cugraph.ktruss_subgraph(G, 3, use_weights=False)
"""

_ensure_compatible_cuda_version()

KTrussSubgraph = Graph()
if G.is_directed():
raise ValueError("input graph must be undirected")

if use_weights:
warning_msg = (
"The use_weights flag is deprecated "
"and will be removed in the next release. if weights "
"were passed at the graph creation, they will be used."
)
warnings.warn(warning_msg, FutureWarning)

sources, destinations, edge_weights, _ = pylibcugraph_k_truss_subgraph(
resource_handle=ResourceHandle(),
graph=G._plc_graph,
Expand Down
1 change: 1 addition & 0 deletions python/cugraph/cugraph/dask/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from .community.triangle_count import triangle_count
from .community.egonet import ego_graph
from .community.induced_subgraph import induced_subgraph
from .community.ktruss_subgraph import ktruss_subgraph
from .centrality.katz_centrality import katz_centrality
from .components.connectivity import weakly_connected_components
from .sampling.uniform_neighbor_sample import uniform_neighbor_sample
Expand Down
3 changes: 2 additions & 1 deletion python/cugraph/cugraph/dask/community/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -15,3 +15,4 @@
from .triangle_count import triangle_count
from .induced_subgraph import induced_subgraph
from .leiden import leiden
from .ktruss_subgraph import ktruss_subgraph
119 changes: 119 additions & 0 deletions python/cugraph/cugraph/dask/community/ktruss_subgraph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from typing import Tuple

import cudf
import cupy as cp
from dask.distributed import wait, default_client
import dask_cudf

from pylibcugraph import (
ResourceHandle,
k_truss_subgraph as pylibcugraph_k_truss_subgraph,
)
import cugraph.dask.comms.comms as Comms


def _call_k_truss_subgraph(
sID: bytes,
mg_graph_x,
k: int,
do_expensive_check: bool,
) -> Tuple[cp.ndarray, cp.ndarray, cp.ndarray]:

return pylibcugraph_k_truss_subgraph(
resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
graph=mg_graph_x,
k=k,
do_expensive_check=do_expensive_check,
)


def convert_to_cudf(cp_arrays: cp.ndarray) -> cudf.DataFrame:
cp_src, cp_dst, cp_weight, _ = cp_arrays

df = cudf.DataFrame()
if cp_src is not None:
df["src"] = cp_src
df["dst"] = cp_dst
if cp_weight is not None:
df["weight"] = cp_weight

return df


def ktruss_subgraph(input_graph, k: int) -> dask_cudf.DataFrame:
"""
Returns the K-Truss subgraph of a graph for a specific k.
The k-truss of a graph is a subgraph where each edge is incident to at
least (k−2) triangles. K-trusses are used for finding tighlty knit groups
of vertices in a graph. A k-truss is a relaxation of a k-clique in the graph.
Finding cliques is computationally demanding and finding the maximal
k-clique is known to be NP-Hard.
Parameters
----------
input_graph : cugraph.Graph
Graph or matrix object, which should contain the connectivity
information. Edge weights, if present, should be single or double
precision floating point values
k : int
The desired k to be used for extracting the k-truss subgraph.
Returns
-------
k_truss_edge_lists : dask_cudf.DataFrame
Distributed GPU data frame containing all source identifiers,
destination identifiers, and edge weights belonging to the truss.
"""
if input_graph.is_directed():
raise ValueError("input graph must be undirected")
# Initialize dask client
client = default_client()

do_expensive_check = False

result = [
client.submit(
_call_k_truss_subgraph,
Comms.get_session_id(),
input_graph._plc_graph[w],
k,
do_expensive_check,
workers=[w],
allow_other_workers=False,
)
for w in Comms.get_workers()
]
wait(result)

cudf_result = [client.submit(convert_to_cudf, cp_arrays) for cp_arrays in result]

wait(cudf_result)

ddf = dask_cudf.from_delayed(cudf_result).persist()
wait(ddf)
# Wait until the inactive futures are released
wait([(r.release(), c_r.release()) for r, c_r in zip(result, cudf_result)])

if input_graph.renumbered:
ddf = input_graph.unrenumber(ddf, "src")
ddf = input_graph.unrenumber(ddf, "dst")

return ddf
34 changes: 0 additions & 34 deletions python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import cugraph
from cugraph.testing import utils
from cugraph.datasets import polbooks, karate_asymmetric
from numba import cuda


# =============================================================================
Expand Down Expand Up @@ -67,32 +66,7 @@ def compare_k_truss(k_truss_cugraph, k, ground_truth_file):
return True


__cuda_version = cuda.runtime.get_version()
__unsupported_cuda_version = (11, 4)


# FIXME: remove when ktruss is supported on CUDA 11.4
@pytest.mark.sg
def test_unsupported_cuda_version():
"""
Ensures the proper exception is raised when ktruss is called in an
unsupported env, and not when called in a supported env.
"""
k = 5

G = polbooks.get_graph(download=True)
if __cuda_version == __unsupported_cuda_version:
with pytest.raises(NotImplementedError):
cugraph.k_truss(G, k)
else:
cugraph.k_truss(G, k)


@pytest.mark.sg
@pytest.mark.skipif(
(__cuda_version == __unsupported_cuda_version),
reason="skipping on unsupported CUDA " f"{__unsupported_cuda_version} environment.",
)
@pytest.mark.parametrize("_, nx_ground_truth", utils.DATASETS_KTRUSS)
def test_ktruss_subgraph_Graph(_, nx_ground_truth):

Expand All @@ -104,10 +78,6 @@ def test_ktruss_subgraph_Graph(_, nx_ground_truth):


@pytest.mark.sg
@pytest.mark.skipif(
(__cuda_version == __unsupported_cuda_version),
reason="skipping on unsupported CUDA " f"{__unsupported_cuda_version} environment.",
)
def test_ktruss_subgraph_Graph_nx():
k = 5
dataset_path = polbooks.get_path()
Expand All @@ -122,10 +92,6 @@ def test_ktruss_subgraph_Graph_nx():


@pytest.mark.sg
@pytest.mark.skipif(
(__cuda_version == __unsupported_cuda_version),
reason="skipping on unsupported CUDA " f"{__unsupported_cuda_version} environment.",
)
def test_ktruss_subgraph_directed_Graph():
k = 5
edgevals = True
Expand Down
Loading

0 comments on commit 7b81173

Please sign in to comment.