Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nx-cugraph: add weakly connected components #4071

Merged
merged 8 commits into from
Jan 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions python/nx-cugraph/_nx_cugraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@
"in_degree_centrality",
"is_connected",
"is_isolate",
"is_strongly_connected",
"is_weakly_connected",
"isolates",
"k_truss",
"karate_club_graph",
Expand All @@ -85,6 +87,8 @@
"number_connected_components",
"number_of_isolates",
"number_of_selfloops",
"number_strongly_connected_components",
"number_weakly_connected_components",
"octahedral_graph",
"out_degree_centrality",
"pagerank",
Expand All @@ -95,13 +99,15 @@
"single_source_shortest_path_length",
"single_target_shortest_path_length",
"star_graph",
"strongly_connected_components",
"tadpole_graph",
"tetrahedral_graph",
"trivial_graph",
"truncated_cube_graph",
"truncated_tetrahedron_graph",
"turan_graph",
"tutte_graph",
"weakly_connected_components",
"wheel_graph",
# END: functions
},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -11,3 +11,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .connected import *
from .strongly_connected import *
from .weakly_connected import *
22 changes: 19 additions & 3 deletions python/nx-cugraph/nx_cugraph/algorithms/components/connected.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,15 @@
@networkx_algorithm(plc="weakly_connected_components", version_added="23.12")
def number_connected_components(G):
G = _to_undirected_graph(G)
return _number_connected_components(G)


def _number_connected_components(G, symmetrize=None):
if G.src_indices.size == 0:
return len(G)
unused_node_ids, labels = plc.weakly_connected_components(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
graph=G._get_plc_graph(symmetrize=symmetrize),
offsets=None,
indices=None,
weights=None,
Expand All @@ -54,11 +60,15 @@ def _(G):
@networkx_algorithm(plc="weakly_connected_components", version_added="23.12")
def connected_components(G):
G = _to_undirected_graph(G)
return _connected_components(G)


def _connected_components(G, symmetrize=None):
if G.src_indices.size == 0:
return [{key} for key in G._nodeiter_to_iter(range(len(G)))]
node_ids, labels = plc.weakly_connected_components(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
graph=G._get_plc_graph(symmetrize=symmetrize),
offsets=None,
indices=None,
weights=None,
Expand All @@ -73,13 +83,19 @@ def connected_components(G):
@networkx_algorithm(plc="weakly_connected_components", version_added="23.12")
def is_connected(G):
G = _to_undirected_graph(G)
return _is_connected(G)


def _is_connected(G, symmetrize=None):
if len(G) == 0:
raise nx.NetworkXPointlessConcept(
"Connectivity is undefined for the null graph."
)
if G.src_indices.size == 0:
return len(G) == 1
unused_node_ids, labels = plc.weakly_connected_components(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
graph=G._get_plc_graph(symmetrize=symmetrize),
offsets=None,
indices=None,
weights=None,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cupy as cp
import networkx as nx
import pylibcugraph as plc

from nx_cugraph.convert import _to_directed_graph
from nx_cugraph.utils import (
_groupby,
index_dtype,
networkx_algorithm,
not_implemented_for,
)

__all__ = [
"number_strongly_connected_components",
"strongly_connected_components",
"is_strongly_connected",
]


def _strongly_connected_components(G):
# TODO: create utility function to convert just the indices to CSR
# TODO: this uses a legacy PLC function (strongly_connected_components)
N = len(G)
indices = cp.lexsort(cp.vstack((G.dst_indices, G.src_indices)))
dst_indices = G.dst_indices[indices]
offsets = cp.searchsorted(
G.src_indices, cp.arange(N + 1, dtype=index_dtype), sorter=indices
).astype(index_dtype)
labels = cp.zeros(N, dtype=index_dtype)
plc.strongly_connected_components(
offsets=offsets,
indices=dst_indices,
weights=None,
num_verts=N,
num_edges=dst_indices.size,
labels=labels,
)
return labels


@not_implemented_for("undirected")
@networkx_algorithm(version_added="24.02", plc="strongly_connected_components")
def strongly_connected_components(G):
G = _to_directed_graph(G)
if G.src_indices.size == 0:
return [{key} for key in G._nodeiter_to_iter(range(len(G)))]
labels = _strongly_connected_components(G)
groups = _groupby(labels, cp.arange(len(G), dtype=index_dtype))
return (G._nodearray_to_set(connected_ids) for connected_ids in groups.values())


@not_implemented_for("undirected")
@networkx_algorithm(version_added="24.02", plc="strongly_connected_components")
def number_strongly_connected_components(G):
G = _to_directed_graph(G)
if G.src_indices.size == 0:
return len(G)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just curious, no action needed: is this expected to always return 0 in this case? If so, is there a reason calling len() is preferred over just returning 0?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I should use G.number_of_edges() instead of G.src_indices.size (but for some reason the latter is easier for me to remember and reason about). Anyway, if the number of edges are zero, the the number of components is the number of nodes, hence we can't simply return 0.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I may update to use number_of_edges lots of places for clarity in a different PR. I agree this shouldn't hold up this PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I see, number_of_edges actually does a lot more work. If we want to know if there are exactly 0 edges, G.src_indices.size works great.

labels = _strongly_connected_components(G)
return cp.unique(labels).size


@not_implemented_for("undirected")
@networkx_algorithm(version_added="24.02", plc="strongly_connected_components")
def is_strongly_connected(G):
G = _to_directed_graph(G)
if len(G) == 0:
raise nx.NetworkXPointlessConcept(
"Connectivity is undefined for the null graph."
)
if G.src_indices.size == 0:
return len(G) == 1
labels = _strongly_connected_components(G)
return bool((labels == labels[0]).all())
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from nx_cugraph.convert import _to_directed_graph
from nx_cugraph.utils import networkx_algorithm, not_implemented_for

from .connected import (
_connected_components,
_is_connected,
_number_connected_components,
)

__all__ = [
"number_weakly_connected_components",
"weakly_connected_components",
"is_weakly_connected",
]


@not_implemented_for("undirected")
@networkx_algorithm(plc="weakly_connected_components", version_added="24.02")
def weakly_connected_components(G):
G = _to_directed_graph(G)
return _connected_components(G, symmetrize="union")


@not_implemented_for("undirected")
@networkx_algorithm(plc="weakly_connected_components", version_added="24.02")
def number_weakly_connected_components(G):
G = _to_directed_graph(G)
return _number_connected_components(G, symmetrize="union")


@not_implemented_for("undirected")
@networkx_algorithm(plc="weakly_connected_components", version_added="24.02")
def is_weakly_connected(G):
G = _to_directed_graph(G)
return _is_connected(G, symmetrize="union")
24 changes: 17 additions & 7 deletions python/nx-cugraph/nx_cugraph/algorithms/isolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@
from typing import TYPE_CHECKING

import cupy as cp
import numpy as np

from nx_cugraph.convert import _to_graph
from nx_cugraph.utils import networkx_algorithm
from nx_cugraph.utils import index_dtype, networkx_algorithm

if TYPE_CHECKING: # pragma: no cover
from nx_cugraph.typing import IndexValue
Expand All @@ -36,19 +37,28 @@ def is_isolate(G, n):
)


def _mark_isolates(G) -> cp.ndarray[bool]:
def _mark_isolates(G, symmetrize=None) -> cp.ndarray[bool]:
"""Return a boolean mask array indicating indices of isolated nodes."""
mark_isolates = cp.ones(len(G), bool)
mark_isolates[G.src_indices] = False
if G.is_directed():
mark_isolates[G.dst_indices] = False
if G.is_directed() and symmetrize == "intersection":
N = G._N
# Upcast to int64 so indices don't overflow
src_dst = N * G.src_indices.astype(np.int64) + G.dst_indices
src_dst_T = G.src_indices + N * G.dst_indices.astype(np.int64)
src_dst_new = cp.intersect1d(src_dst, src_dst_T)
new_indices = cp.floor_divide(src_dst_new, N, dtype=index_dtype)
mark_isolates[new_indices] = False
else:
mark_isolates[G.src_indices] = False
if G.is_directed():
mark_isolates[G.dst_indices] = False
return mark_isolates


def _isolates(G) -> cp.ndarray[IndexValue]:
def _isolates(G, symmetrize=None) -> cp.ndarray[IndexValue]:
"""Like isolates, but return an array of indices instead of an iterator of nodes."""
G = _to_graph(G)
return cp.nonzero(_mark_isolates(G))[0]
return cp.nonzero(_mark_isolates(G, symmetrize=symmetrize))[0]


@networkx_algorithm(version_added="23.10")
Expand Down
23 changes: 21 additions & 2 deletions python/nx-cugraph/nx_cugraph/classes/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,7 @@ def _get_plc_graph(
store_transposed: bool = False,
switch_indices: bool = False,
edge_array: cp.ndarray[EdgeValue] | None = None,
symmetrize: str | None = None,
):
if edge_array is not None or edge_attr is None:
pass
Expand Down Expand Up @@ -650,12 +651,30 @@ def _get_plc_graph(
dst_indices = self.dst_indices
if switch_indices:
src_indices, dst_indices = dst_indices, src_indices
if symmetrize is not None:
if edge_array is not None:
raise NotImplementedError(
"edge_array must be None when symmetrizing the graph"
)
N = self._N
# Upcast to int64 so indices don't overflow
src_dst = N * src_indices.astype(np.int64) + dst_indices
src_dst_T = src_indices + N * dst_indices.astype(np.int64)
if symmetrize == "union":
src_dst_new = cp.union1d(src_dst, src_dst_T)
elif symmetrize == "intersection":
src_dst_new = cp.intersect1d(src_dst, src_dst_T)
else:
raise ValueError(
f'symmetrize must be "union" or "intersection"; got "{symmetrize}"'
)
src_indices, dst_indices = cp.divmod(src_dst_new, N, dtype=index_dtype)

return plc.SGGraph(
resource_handle=plc.ResourceHandle(),
graph_properties=plc.GraphProperties(
is_multigraph=self.is_multigraph(),
is_symmetric=not self.is_directed(),
is_multigraph=self.is_multigraph() and symmetrize is None,
is_symmetric=not self.is_directed() or symmetrize is not None,
),
src_or_offset_array=src_indices,
dst_or_index_array=dst_indices,
Expand Down
7 changes: 6 additions & 1 deletion python/nx-cugraph/nx_cugraph/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,12 @@ def key(testpath):
louvain_different = "Louvain may be different due to RNG"
no_string_dtype = "string edge values not currently supported"

xfail = {}
xfail = {
key(
"test_strongly_connected.py:"
"TestStronglyConnected.test_condensation_mapping_and_members"
): "Strongly connected groups in different iteration order",
}

from packaging.version import parse

Expand Down
Loading