Skip to content

Commit

Permalink
nx-cugraph: add weakly connected components (#4071)
Browse files Browse the repository at this point in the history
This doesn't currently work, because `plc.weakly_connected_components` only works on symmetric graphs (so it's not actually performing wcc now is it?):

> RuntimeError: non-success value returned from cugraph_weakly_connected_components: CUGRAPH_UNKNOWN_ERROR cuGraph failure at file=[...]/cugraph/cpp/src/components/weakly_connected_components_impl.cuh line=283: Invalid input argument: input graph should be symmetric for weakly connected components.

_These are high-priority algorithms for `nx-cugraph`, because they are widely used by networkx dependents._

Authors:
  - Erik Welch (https://github.com/eriknw)

Approvers:
  - Rick Ratzel (https://github.com/rlratzel)

URL: #4071
  • Loading branch information
eriknw authored Jan 17, 2024
1 parent 4748ca1 commit 8672534
Show file tree
Hide file tree
Showing 8 changed files with 204 additions and 14 deletions.
6 changes: 6 additions & 0 deletions python/nx-cugraph/_nx_cugraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@
"in_degree_centrality",
"is_connected",
"is_isolate",
"is_strongly_connected",
"is_weakly_connected",
"isolates",
"k_truss",
"karate_club_graph",
Expand All @@ -85,6 +87,8 @@
"number_connected_components",
"number_of_isolates",
"number_of_selfloops",
"number_strongly_connected_components",
"number_weakly_connected_components",
"octahedral_graph",
"out_degree_centrality",
"pagerank",
Expand All @@ -95,13 +99,15 @@
"single_source_shortest_path_length",
"single_target_shortest_path_length",
"star_graph",
"strongly_connected_components",
"tadpole_graph",
"tetrahedral_graph",
"trivial_graph",
"truncated_cube_graph",
"truncated_tetrahedron_graph",
"turan_graph",
"tutte_graph",
"weakly_connected_components",
"wheel_graph",
# END: functions
},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -11,3 +11,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .connected import *
from .strongly_connected import *
from .weakly_connected import *
22 changes: 19 additions & 3 deletions python/nx-cugraph/nx_cugraph/algorithms/components/connected.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,15 @@
@networkx_algorithm(plc="weakly_connected_components", version_added="23.12")
def number_connected_components(G):
G = _to_undirected_graph(G)
return _number_connected_components(G)


def _number_connected_components(G, symmetrize=None):
if G.src_indices.size == 0:
return len(G)
unused_node_ids, labels = plc.weakly_connected_components(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
graph=G._get_plc_graph(symmetrize=symmetrize),
offsets=None,
indices=None,
weights=None,
Expand All @@ -54,11 +60,15 @@ def _(G):
@networkx_algorithm(plc="weakly_connected_components", version_added="23.12")
def connected_components(G):
G = _to_undirected_graph(G)
return _connected_components(G)


def _connected_components(G, symmetrize=None):
if G.src_indices.size == 0:
return [{key} for key in G._nodeiter_to_iter(range(len(G)))]
node_ids, labels = plc.weakly_connected_components(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
graph=G._get_plc_graph(symmetrize=symmetrize),
offsets=None,
indices=None,
weights=None,
Expand All @@ -73,13 +83,19 @@ def connected_components(G):
@networkx_algorithm(plc="weakly_connected_components", version_added="23.12")
def is_connected(G):
G = _to_undirected_graph(G)
return _is_connected(G)


def _is_connected(G, symmetrize=None):
if len(G) == 0:
raise nx.NetworkXPointlessConcept(
"Connectivity is undefined for the null graph."
)
if G.src_indices.size == 0:
return len(G) == 1
unused_node_ids, labels = plc.weakly_connected_components(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
graph=G._get_plc_graph(symmetrize=symmetrize),
offsets=None,
indices=None,
weights=None,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cupy as cp
import networkx as nx
import pylibcugraph as plc

from nx_cugraph.convert import _to_directed_graph
from nx_cugraph.utils import (
_groupby,
index_dtype,
networkx_algorithm,
not_implemented_for,
)

__all__ = [
"number_strongly_connected_components",
"strongly_connected_components",
"is_strongly_connected",
]


def _strongly_connected_components(G):
# TODO: create utility function to convert just the indices to CSR
# TODO: this uses a legacy PLC function (strongly_connected_components)
N = len(G)
indices = cp.lexsort(cp.vstack((G.dst_indices, G.src_indices)))
dst_indices = G.dst_indices[indices]
offsets = cp.searchsorted(
G.src_indices, cp.arange(N + 1, dtype=index_dtype), sorter=indices
).astype(index_dtype)
labels = cp.zeros(N, dtype=index_dtype)
plc.strongly_connected_components(
offsets=offsets,
indices=dst_indices,
weights=None,
num_verts=N,
num_edges=dst_indices.size,
labels=labels,
)
return labels


@not_implemented_for("undirected")
@networkx_algorithm(version_added="24.02", plc="strongly_connected_components")
def strongly_connected_components(G):
G = _to_directed_graph(G)
if G.src_indices.size == 0:
return [{key} for key in G._nodeiter_to_iter(range(len(G)))]
labels = _strongly_connected_components(G)
groups = _groupby(labels, cp.arange(len(G), dtype=index_dtype))
return (G._nodearray_to_set(connected_ids) for connected_ids in groups.values())


@not_implemented_for("undirected")
@networkx_algorithm(version_added="24.02", plc="strongly_connected_components")
def number_strongly_connected_components(G):
G = _to_directed_graph(G)
if G.src_indices.size == 0:
return len(G)
labels = _strongly_connected_components(G)
return cp.unique(labels).size


@not_implemented_for("undirected")
@networkx_algorithm(version_added="24.02", plc="strongly_connected_components")
def is_strongly_connected(G):
G = _to_directed_graph(G)
if len(G) == 0:
raise nx.NetworkXPointlessConcept(
"Connectivity is undefined for the null graph."
)
if G.src_indices.size == 0:
return len(G) == 1
labels = _strongly_connected_components(G)
return bool((labels == labels[0]).all())
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from nx_cugraph.convert import _to_directed_graph
from nx_cugraph.utils import networkx_algorithm, not_implemented_for

from .connected import (
_connected_components,
_is_connected,
_number_connected_components,
)

__all__ = [
"number_weakly_connected_components",
"weakly_connected_components",
"is_weakly_connected",
]


@not_implemented_for("undirected")
@networkx_algorithm(plc="weakly_connected_components", version_added="24.02")
def weakly_connected_components(G):
G = _to_directed_graph(G)
return _connected_components(G, symmetrize="union")


@not_implemented_for("undirected")
@networkx_algorithm(plc="weakly_connected_components", version_added="24.02")
def number_weakly_connected_components(G):
G = _to_directed_graph(G)
return _number_connected_components(G, symmetrize="union")


@not_implemented_for("undirected")
@networkx_algorithm(plc="weakly_connected_components", version_added="24.02")
def is_weakly_connected(G):
G = _to_directed_graph(G)
return _is_connected(G, symmetrize="union")
24 changes: 17 additions & 7 deletions python/nx-cugraph/nx_cugraph/algorithms/isolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@
from typing import TYPE_CHECKING

import cupy as cp
import numpy as np

from nx_cugraph.convert import _to_graph
from nx_cugraph.utils import networkx_algorithm
from nx_cugraph.utils import index_dtype, networkx_algorithm

if TYPE_CHECKING: # pragma: no cover
from nx_cugraph.typing import IndexValue
Expand All @@ -36,19 +37,28 @@ def is_isolate(G, n):
)


def _mark_isolates(G) -> cp.ndarray[bool]:
def _mark_isolates(G, symmetrize=None) -> cp.ndarray[bool]:
"""Return a boolean mask array indicating indices of isolated nodes."""
mark_isolates = cp.ones(len(G), bool)
mark_isolates[G.src_indices] = False
if G.is_directed():
mark_isolates[G.dst_indices] = False
if G.is_directed() and symmetrize == "intersection":
N = G._N
# Upcast to int64 so indices don't overflow
src_dst = N * G.src_indices.astype(np.int64) + G.dst_indices
src_dst_T = G.src_indices + N * G.dst_indices.astype(np.int64)
src_dst_new = cp.intersect1d(src_dst, src_dst_T)
new_indices = cp.floor_divide(src_dst_new, N, dtype=index_dtype)
mark_isolates[new_indices] = False
else:
mark_isolates[G.src_indices] = False
if G.is_directed():
mark_isolates[G.dst_indices] = False
return mark_isolates


def _isolates(G) -> cp.ndarray[IndexValue]:
def _isolates(G, symmetrize=None) -> cp.ndarray[IndexValue]:
"""Like isolates, but return an array of indices instead of an iterator of nodes."""
G = _to_graph(G)
return cp.nonzero(_mark_isolates(G))[0]
return cp.nonzero(_mark_isolates(G, symmetrize=symmetrize))[0]


@networkx_algorithm(version_added="23.10")
Expand Down
23 changes: 21 additions & 2 deletions python/nx-cugraph/nx_cugraph/classes/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,7 @@ def _get_plc_graph(
store_transposed: bool = False,
switch_indices: bool = False,
edge_array: cp.ndarray[EdgeValue] | None = None,
symmetrize: str | None = None,
):
if edge_array is not None or edge_attr is None:
pass
Expand Down Expand Up @@ -650,12 +651,30 @@ def _get_plc_graph(
dst_indices = self.dst_indices
if switch_indices:
src_indices, dst_indices = dst_indices, src_indices
if symmetrize is not None:
if edge_array is not None:
raise NotImplementedError(
"edge_array must be None when symmetrizing the graph"
)
N = self._N
# Upcast to int64 so indices don't overflow
src_dst = N * src_indices.astype(np.int64) + dst_indices
src_dst_T = src_indices + N * dst_indices.astype(np.int64)
if symmetrize == "union":
src_dst_new = cp.union1d(src_dst, src_dst_T)
elif symmetrize == "intersection":
src_dst_new = cp.intersect1d(src_dst, src_dst_T)
else:
raise ValueError(
f'symmetrize must be "union" or "intersection"; got "{symmetrize}"'
)
src_indices, dst_indices = cp.divmod(src_dst_new, N, dtype=index_dtype)

return plc.SGGraph(
resource_handle=plc.ResourceHandle(),
graph_properties=plc.GraphProperties(
is_multigraph=self.is_multigraph(),
is_symmetric=not self.is_directed(),
is_multigraph=self.is_multigraph() and symmetrize is None,
is_symmetric=not self.is_directed() or symmetrize is not None,
),
src_or_offset_array=src_indices,
dst_or_index_array=dst_indices,
Expand Down
7 changes: 6 additions & 1 deletion python/nx-cugraph/nx_cugraph/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,12 @@ def key(testpath):
louvain_different = "Louvain may be different due to RNG"
no_string_dtype = "string edge values not currently supported"

xfail = {}
xfail = {
key(
"test_strongly_connected.py:"
"TestStronglyConnected.test_condensation_mapping_and_members"
): "Strongly connected groups in different iteration order",
}

from packaging.version import parse

Expand Down

0 comments on commit 8672534

Please sign in to comment.