rapidsai · rapids-bot · Oct 31, 2023 · Oct 31, 2023 · Oct 31, 2023 · Oct 31, 2023
@@ -38,6 +38,7 @@
  "complete_bipartite_graph",
  "complete_graph",
  "complete_multipartite_graph",
+ "connected_components",
  "cubical_graph",
  "cycle_graph",
  "davis_southern_women_graph",
@@ -56,6 +57,7 @@
  "house_x_graph",
  "icosahedral_graph",
  "in_degree_centrality",
+ "is_connected",
  "is_isolate",
  "isolates",
  "k_truss",
@@ -66,7 +68,9 @@
  "lollipop_graph",
  "louvain_communities",
  "moebius_kantor_graph",
+ "node_connected_component",
  "null_graph",
+ "number_connected_components",
  "number_of_isolates",
  "number_of_selfloops",
  "octahedral_graph",
@@ -91,6 +95,10 @@
  "betweenness_centrality": "`weight` parameter is not yet supported.",
  "edge_betweenness_centrality": "`weight` parameter is not yet supported.",
  "from_pandas_edgelist": "cudf.DataFrame inputs also supported.",
+ "k_truss": (
+ "Currently raises `NotImplementedError` for graphs with more than one connected\n"
+ "component when k >= 3. We expect to fix this soon."
+ ),
  "louvain_communities": "`seed` parameter is currently ignored.",
  # END: extra_docstrings
  },

@@ -10,8 +10,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from . import bipartite, centrality, community
+from . import bipartite, centrality, community, components
 from .bipartite import complete_bipartite_graph
 from .centrality import *
+from .components import *
 from .core import *
 from .isolate import *
@@ -62,7 +62,7 @@ def louvain_communities(
  resolution=resolution,
  do_expensive_check=False,
  )
- groups = _groupby(clusters, vertices)
+ groups = _groupby(clusters, vertices, groups_are_canonical=True)
  rv = [set(G._nodearray_to_list(node_ids)) for node_ids in groups.values()]
  # TODO: PLC doesn't handle isolated vertices yet, so this is a temporary fix
  isolates = _isolates(G)

@@ -0,0 +1,13 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .connected import *
@@ -0,0 +1,130 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import itertools
+
+import cupy as cp
+import networkx as nx
+import pylibcugraph as plc
+
+from nx_cugraph.convert import _to_undirected_graph
+from nx_cugraph.utils import _groupby, networkx_algorithm, not_implemented_for
+
+from ..isolate import _isolates
+
+__all__ = [
+ "number_connected_components",
+ "connected_components",
+ "is_connected",
+ "node_connected_component",
+]
+
+
+@not_implemented_for("directed")
+@networkx_algorithm
+def number_connected_components(G):
+ return sum(1 for _ in connected_components(G))
+ # PREFERRED IMPLEMENTATION, BUT PLC DOES NOT HANDLE ISOLATED VERTICES WELL
+ # G = _to_undirected_graph(G)
+ # unused_node_ids, labels = plc.weakly_connected_components(
+ # resource_handle=plc.ResourceHandle(),
+ # graph=G._get_plc_graph(),
+ # offsets=None,
+ # indices=None,
+ # weights=None,
+ # labels=None,
+ # do_expensive_check=False,
+ # )
+ # return cp.unique(labels).size
+
+
+@number_connected_components._can_run
+def _(G):
+ # NetworkX <= 3.2.1 does not check directedness for us
+ try:
+ return not G.is_directed()
+ except Exception:
+ return False
+
+
+@not_implemented_for("directed")
+@networkx_algorithm
+def connected_components(G):
+ G = _to_undirected_graph(G)
+ if G.src_indices.size == 0:
+ # TODO: PLC doesn't handle empty graphs (or isolated nodes) gracefully!
+ return [{key} for key in G._nodeiter_to_iter(range(len(G)))]
+ node_ids, labels = plc.weakly_connected_components(
+ resource_handle=plc.ResourceHandle(),
+ graph=G._get_plc_graph(),
+ offsets=None,
+ indices=None,
+ weights=None,
+ labels=None,
+ do_expensive_check=False,
+ )
+ groups = _groupby(labels, node_ids)
+ it = (G._nodearray_to_set(connected_ids) for connected_ids in groups.values())
+ # TODO: PLC doesn't handle isolated vertices yet, so this is a temporary fix
+ isolates = _isolates(G)
+ if isolates.size > 0:
+ isolates = isolates[isolates > node_ids.max()]
+ if isolates.size > 0:
+ it = itertools.chain(
+ it, ({node} for node in G._nodearray_to_list(isolates))
+ )
+ return it
+
+
+@not_implemented_for("directed")
+@networkx_algorithm
+def is_connected(G):
+ G = _to_undirected_graph(G)
+ if len(G) == 0:
+ raise nx.NetworkXPointlessConcept(
+ "Connectivity is undefined for the null graph."
+ )
+ for community in connected_components(G):
+ return len(community) == len(G)
+ raise RuntimeError # pragma: no cover
+ # PREFERRED IMPLEMENTATION, BUT PLC DOES NOT HANDLE ISOLATED VERTICES WELL
+ # unused_node_ids, labels = plc.weakly_connected_components(
+ # resource_handle=plc.ResourceHandle(),
+ # graph=G._get_plc_graph(),
+ # offsets=None,
+ # indices=None,
+ # weights=None,
+ # labels=None,
+ # do_expensive_check=False,
+ # )
+ # return labels.size == len(G) and cp.unique(labels).size == 1
+
+
+@not_implemented_for("directed")
+@networkx_algorithm
+def node_connected_component(G, n):
+ # We could also do plain BFS from n
+ G = _to_undirected_graph(G)
+ node_id = n if G.key_to_id is None else G.key_to_id[n]
+ node_ids, labels = plc.weakly_connected_components(
+ resource_handle=plc.ResourceHandle(),
+ graph=G._get_plc_graph(),
+ offsets=None,
+ indices=None,
+ weights=None,
+ labels=None,
+ do_expensive_check=False,
+ )
+ indices = cp.nonzero(node_ids == node_id)[0]
+ if indices.size == 0:
+ return {n}
+ return G._nodearray_to_set(node_ids[labels == labels[indices[0]]])
@@ -24,13 +24,18 @@
 @not_implemented_for("multigraph")
 @networkx_algorithm
 def k_truss(G, k):
+ """
+ Currently raises `NotImplementedError` for graphs with more than one connected
+ component when k >= 3. We expect to fix this soon.
+ """
  if is_nx := isinstance(G, nx.Graph):
  G = nxcg.from_networkx(G, preserve_all_attrs=True)
  if nxcg.number_of_selfloops(G) > 0:
  raise nx.NetworkXError(
  "Input graph has self loops which is not permitted; "
  "Consider using G.remove_edges_from(nx.selfloop_edges(G))."
  )
+
  # TODO: create renumbering helper function(s)
  if k < 3:
  # k-truss graph is comprised of nodes incident on k-2 triangles, so k<3 is a
@@ -49,6 +54,11 @@ def k_truss(G, k):
  # Renumber step 1: edge values (no changes needed)
  edge_values = {key: val.copy() for key, val in G.edge_values.items()}
  edge_masks = {key: val.copy() for key, val in G.edge_masks.items()}
+ elif (ncc := nxcg.number_connected_components(G)) > 1:
+ raise NotImplementedError(
+ "nx_cugraph.k_truss does not yet work on graphs with more than one "
+ f"connected component (this graph has {ncc}). We expect to fix this soon."
+ )
  else:
  edge_dtype = _get_int_dtype(G.src_indices.size - 1)
  edge_indices = cp.arange(G.src_indices.size, dtype=edge_dtype)

@@ -692,6 +692,11 @@ def _nodearray_to_list(self, node_ids: cp.ndarray[IndexValue]) -> list[NodeKey]:
  return node_ids.tolist()
  return list(self._nodeiter_to_iter(node_ids.tolist()))
 
+ def _nodearray_to_set(self, node_ids: cp.ndarray[IndexValue]) -> set[NodeKey]:
+ if self.key_to_id is None:
+ return set(node_ids.tolist())
+ return set(self._nodeiter_to_iter(node_ids.tolist()))
+
  def _nodearray_to_dict(
  self, values: cp.ndarray[NodeValue]
  ) -> dict[NodeKey, NodeValue]:

@@ -223,11 +223,20 @@ def key(testpath):
  }
  )
 
+ too_slow = "Too slow to run"
+ skip = {
+ key("test_tree_isomorphism.py:test_positive"): too_slow,
+ key("test_tree_isomorphism.py:test_negative"): too_slow,
+ }
+
  for item in items:
  kset = set(item.keywords)
  for (test_name, keywords), reason in xfail.items():
  if item.name == test_name and keywords.issubset(kset):
  item.add_marker(pytest.mark.xfail(reason=reason))
+ for (test_name, keywords), reason in skip.items():
+ if item.name == test_name and keywords.issubset(kset):
+ item.add_marker(pytest.mark.skip(reason=reason))
 
  @classmethod
  def can_run(cls, name, args, kwargs):

@@ -0,0 +1,30 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import networkx as nx
+import pytest
+
+import nx_cugraph as nxcg
+
+
+@pytest.mark.parametrize(
+ "get_graph", [nx.florentine_families_graph, nx.les_miserables_graph]
+)
+def test_k_truss(get_graph):
+ Gnx = get_graph()
+ Gcg = nxcg.from_networkx(Gnx, preserve_all_attrs=True)
+ for k in range(10):
+ Hnx = nx.k_truss(Gnx, k)
+ Hcg = nxcg.k_truss(Gcg, k)
+ assert nx.utils.graphs_equal(Hnx, nxcg.to_networkx(Hcg))
+ if Hnx.number_of_edges() == 0:
+ break
@@ -13,6 +13,7 @@
 from __future__ import annotations
 
 from functools import partial, update_wrapper
+from textwrap import dedent
 
 from networkx.utils.decorators import nodes_or_number, not_implemented_for
 
@@ -65,7 +66,9 @@ def __new__(
  )
  instance.extra_params = extra_params
  # The docstring on our function is added to the NetworkX docstring.
- instance.extra_doc = func.__doc__
+ instance.extra_doc = (
+ dedent(func.__doc__.lstrip("\n").rstrip()) if func.__doc__ else None
+ )
  # Copy __doc__ from NetworkX
  if instance.name in _registered_algorithms:
  instance.__doc__ = _registered_algorithms[instance.name].__doc__

@@ -21,40 +21,56 @@
 import cupy as cp
 import numpy as np
 
+try:
+ from itertools import pairwise # Python >=3.10
+except ImportError:
+
+ def pairwise(it):
+ it = iter(it)
+ for prev in it:
+ for cur in it:
+ yield (prev, cur)
+ prev = cur
+
+
 __all__ = ["index_dtype", "_groupby", "_seed_to_int", "_get_int_dtype"]
 
 # This may switch to np.uint32 at some point
 index_dtype = np.int32
 
 
-def _groupby(groups: cp.ndarray, values: cp.ndarray) -> dict[int, cp.ndarray]:
+def _groupby(
+ groups: cp.ndarray, values: cp.ndarray, groups_are_canonical: bool = False
+) -> dict[int, cp.ndarray]:
  """Perform a groupby operation given an array of group IDs and array of values.
 
  Parameters
  ----------
  groups : cp.ndarray
  Array that holds the group IDs.
- Group IDs are assumed to be consecutive integers from 0.
  values : cp.ndarray
  Array of values to be grouped according to groups.
  Must be the same size as groups array.
+ groups_are_canonical : bool, default False
+ Whether the group IDs are consecutive integers beginning with 0.
 
  Returns
  -------
  dict with group IDs as keys and cp.ndarray as values.
  """
- # It would actually be easy to support groups that aren't consecutive integers,
- # but let's wait until we need it to implement it.
- sorted_groups = cp.argsort(groups)
- sorted_values = values[sorted_groups]
- rv = {}
- start = 0
- for i, end in enumerate(
- [*(cp.nonzero(cp.diff(groups[sorted_groups]))[0] + 1).tolist(), groups.size]
- ):
- rv[i] = sorted_values[start:end]
- start = end
- return rv
+ if groups.size == 0:
+ return {}
+ sort_indices = cp.argsort(groups)
+ sorted_groups = groups[sort_indices]
+ sorted_values = values[sort_indices]
+ prepend = 1 if groups_are_canonical else sorted_groups[0] + 1
+ left_bounds = cp.nonzero(cp.diff(sorted_groups, prepend=prepend))[0]
+ boundaries = pairwise(itertools.chain(left_bounds.tolist(), [groups.size]))
+ if groups_are_canonical:
+ it = enumerate(boundaries)
+ else:
+ it = zip(sorted_groups[left_bounds].tolist(), boundaries)
+ return {group: sorted_values[start:end] for group, (start, end) in it}
 
 
 def _seed_to_int(seed: int | Random | None) -> int:

@@ -218,6 +218,7 @@ ignore = [
 # Allow assert, print, RNG, and no docstring
 "nx_cugraph/**/tests/*py" = ["S101", "S311", "T201", "D103", "D100"]
 "_nx_cugraph/__init__.py" = ["E501"]
+"nx_cugraph/algorithms/**/*py" = ["D205", "D401"] # Allow flexible docstrings for algorithms
 
 [tool.ruff.flake8-annotations]
 mypy-init-return = true