From 063c7a8a995e8b37043a55759ef63d1f431f09e0 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Thu, 28 Sep 2023 16:18:43 -0500 Subject: [PATCH 1/5] nx-cugraph: handle louvain with isolated nodes --- python/nx-cugraph/_nx_cugraph/__init__.py | 3 +- .../nx_cugraph/algorithms/__init__.py | 1 + .../algorithms/community/louvain.py | 24 +++++++++-- .../nx_cugraph/algorithms/isolate.py | 33 +++++++++++++++ python/nx-cugraph/nx_cugraph/classes/graph.py | 4 +- .../nx_cugraph/tests/test_community.py | 42 +++++++++++++++++++ 6 files changed, 100 insertions(+), 7 deletions(-) create mode 100644 python/nx-cugraph/nx_cugraph/algorithms/isolate.py create mode 100644 python/nx-cugraph/nx_cugraph/tests/test_community.py diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py index 9b3332106ec..224bc1d9060 100644 --- a/python/nx-cugraph/_nx_cugraph/__init__.py +++ b/python/nx-cugraph/_nx_cugraph/__init__.py @@ -31,6 +31,7 @@ # BEGIN: functions "betweenness_centrality", "edge_betweenness_centrality", + "isolates", "louvain_communities", # END: functions }, @@ -38,7 +39,7 @@ # BEGIN: extra_docstrings "betweenness_centrality": "`weight` parameter is not yet supported.", "edge_betweenness_centrality": "`weight` parameter is not yet supported.", - "louvain_communities": "`threshold` and `seed` parameters are currently ignored.", + "louvain_communities": "`seed` parameter is currently ignored.", # END: extra_docstrings }, "extra_parameters": { diff --git a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py index 3a585452d6d..dfd9adfc61a 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py @@ -12,3 +12,4 @@ # limitations under the License. from . import centrality, community from .centrality import * +from .isolate import * diff --git a/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py b/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py index a183b59fe1d..4e331002caa 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py @@ -10,7 +10,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import sys +import warnings import pylibcugraph as plc @@ -22,6 +22,8 @@ not_implemented_for, ) +from ..isolate import _isolates + __all__ = ["louvain_communities"] @@ -34,7 +36,7 @@ def louvain_communities( G, weight="weight", resolution=1, threshold=0.0000001, seed=None, *, max_level=None ): - """`threshold` and `seed` parameters are currently ignored.""" + """`seed` parameter is currently ignored.""" # NetworkX allows both directed and undirected, but cugraph only allows undirected. seed = _seed_to_int(seed) # Unused, but ensure it's valid for future compatibility G = _to_undirected_graph(G, weight) @@ -42,7 +44,14 @@ def louvain_communities( # TODO: PLC doesn't handle empty graphs gracefully! return [{key} for key in G._nodeiter_to_iter(range(len(G)))] if max_level is None: - max_level = sys.maxsize + max_level = 500 + elif max_level > 500: + warnings.warn( + f"max_level is set too high (={max_level}), setting it to 500.", + UserWarning, + stacklevel=2, + ) + max_level = 500 vertices, clusters, modularity = plc.louvain( resource_handle=plc.ResourceHandle(), graph=G._get_plc_graph(), @@ -52,7 +61,14 @@ def louvain_communities( do_expensive_check=False, ) groups = _groupby(clusters, vertices) - return [set(G._nodearray_to_list(node_ids)) for node_ids in groups.values()] + rv = [set(G._nodearray_to_list(node_ids)) for node_ids in groups.values()] + # TODO: PLC doesn't handle isolated vertices yet, so this is a temporary fix + isolates = _isolates(G) + if isolates.size > 0: + isolates = isolates[isolates > vertices.max()] + if isolates.size > 0: + rv.extend({node} for node in G._nodearray_to_list(isolates)) + return rv @louvain_communities._can_run diff --git a/python/nx-cugraph/nx_cugraph/algorithms/isolate.py b/python/nx-cugraph/nx_cugraph/algorithms/isolate.py new file mode 100644 index 00000000000..e5cf0ca0fbc --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/algorithms/isolate.py @@ -0,0 +1,33 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import cupy as cp + +from nx_cugraph.convert import _to_graph +from nx_cugraph.utils import networkx_algorithm + +__all__ = ["isolates"] + + +def _isolates(G) -> cp.ndarray: + G = _to_graph(G) + mark_isolates = cp.ones(len(G), bool) + mark_isolates[G.row_indices] = False + if G.is_directed(): + mark_isolates[G.col_indices] = False + return cp.nonzero(mark_isolates)[0] + + +@networkx_algorithm +def isolates(G): + G = _to_graph(G) + return G._nodeiter_to_iter(iter(_isolates(G).tolist())) diff --git a/python/nx-cugraph/nx_cugraph/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py index 1432f68c752..cd51a02dae8 100644 --- a/python/nx-cugraph/nx_cugraph/classes/graph.py +++ b/python/nx-cugraph/nx_cugraph/classes/graph.py @@ -245,9 +245,9 @@ def from_dcsc( def __new__(cls, incoming_graph_data=None, **attr) -> Graph: if incoming_graph_data is None: new_graph = cls.from_coo(0, cp.empty(0, np.int32), cp.empty(0, np.int32)) - elif incoming_graph_data.__class__ is new_graph.__class__: + elif incoming_graph_data.__class__ is cls: new_graph = incoming_graph_data.copy() - elif incoming_graph_data.__class__ is new_graph.to_networkx_class(): + elif incoming_graph_data.__class__ is cls.to_networkx_class(): new_graph = nxcg.from_networkx(incoming_graph_data, preserve_all_attrs=True) else: raise NotImplementedError diff --git a/python/nx-cugraph/nx_cugraph/tests/test_community.py b/python/nx-cugraph/nx_cugraph/tests/test_community.py new file mode 100644 index 00000000000..7dc450eed3d --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/tests/test_community.py @@ -0,0 +1,42 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import networkx as nx + +import nx_cugraph as nxcg + + +def test_louvain_isolated_nodes(): + def check(left, right): + assert len(left) == len(right) + assert set(map(frozenset, left)) == set(map(frozenset, right)) + + # Empty graph (no nodes) + G = nx.Graph() + nx_result = nx.community.louvain_communities(G) + cg_result = nxcg.community.louvain_communities(G) + check(nx_result, cg_result) + # Graph with no edges + G.add_nodes_from(range(5)) + nx_result = nx.community.louvain_communities(G) + cg_result = nxcg.community.louvain_communities(G) + check(nx_result, cg_result) + # Graph with isolated nodes + G.add_edge(1, 2) + nx_result = nx.community.louvain_communities(G) + cg_result = nxcg.community.louvain_communities(G) + check(nx_result, cg_result) + # Another one + G.add_edge(4, 4) + nx_result = nx.community.louvain_communities(G) + cg_result = nxcg.community.louvain_communities(G) + check(nx_result, cg_result) From aef9670d082069ea22e0e9f0e49134aca41a397d Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Thu, 28 Sep 2023 19:41:05 -0500 Subject: [PATCH 2/5] Document that 500 is max of max_level; also, implement is_isolate and num_isolates --- python/nx-cugraph/_nx_cugraph/__init__.py | 4 ++- .../algorithms/community/louvain.py | 4 ++- .../nx_cugraph/algorithms/isolate.py | 34 +++++++++++++++++-- 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py index 224bc1d9060..ebd13daded0 100644 --- a/python/nx-cugraph/_nx_cugraph/__init__.py +++ b/python/nx-cugraph/_nx_cugraph/__init__.py @@ -31,8 +31,10 @@ # BEGIN: functions "betweenness_centrality", "edge_betweenness_centrality", + "is_isolate", "isolates", "louvain_communities", + "number_of_isolates", # END: functions }, "extra_docstrings": { @@ -45,7 +47,7 @@ "extra_parameters": { # BEGIN: extra_parameters "louvain_communities": { - "max_level : int, optional": "Upper limit of the number of macro-iterations.", + "max_level : int, optional": "Upper limit of the number of macro-iterations (max: 500).", }, # END: extra_parameters }, diff --git a/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py b/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py index 4e331002caa..dc209870c89 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py @@ -30,7 +30,9 @@ @not_implemented_for("directed") @networkx_algorithm( extra_params={ - "max_level : int, optional": "Upper limit of the number of macro-iterations." + "max_level : int, optional": ( + "Upper limit of the number of macro-iterations (max: 500)." + ) } ) def louvain_communities( diff --git a/python/nx-cugraph/nx_cugraph/algorithms/isolate.py b/python/nx-cugraph/nx_cugraph/algorithms/isolate.py index e5cf0ca0fbc..17e8f77087e 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/isolate.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/isolate.py @@ -10,24 +10,52 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +from typing import TYPE_CHECKING + import cupy as cp from nx_cugraph.convert import _to_graph from nx_cugraph.utils import networkx_algorithm -__all__ = ["isolates"] +if TYPE_CHECKING: # pragma: no cover + import numpy as np +__all__ = ["is_isolate", "isolates", "number_of_isolates"] -def _isolates(G) -> cp.ndarray: + +@networkx_algorithm +def is_isolate(G, n): G = _to_graph(G) + index = n if G.key_to_id is None else G.key_to_id[n] + return not ( + (G.row_indices == index).any().tolist() + or G.is_directed() + and (G.col_indices == index).any().tolist() + ) + + +def _mark_isolates(G) -> cp.ndarray[bool]: mark_isolates = cp.ones(len(G), bool) mark_isolates[G.row_indices] = False if G.is_directed(): mark_isolates[G.col_indices] = False - return cp.nonzero(mark_isolates)[0] + return mark_isolates + + +def _isolates(G) -> cp.ndarray[np.int64]: + G = _to_graph(G) + return cp.nonzero(_mark_isolates(G))[0] @networkx_algorithm def isolates(G): G = _to_graph(G) return G._nodeiter_to_iter(iter(_isolates(G).tolist())) + + +@networkx_algorithm +def number_of_isolates(G): + G = _to_graph(G) + return _mark_isolates(G).sum().tolist() From 07b90d17877aebcc09e8c0de437473596a4dbd9c Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Fri, 29 Sep 2023 06:35:53 -0500 Subject: [PATCH 3/5] fix test by handling difference in louvain in networkx <3.2 --- .../nx_cugraph/tests/test_community.py | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/python/nx-cugraph/nx_cugraph/tests/test_community.py b/python/nx-cugraph/nx_cugraph/tests/test_community.py index 7dc450eed3d..126f45c14ae 100644 --- a/python/nx-cugraph/nx_cugraph/tests/test_community.py +++ b/python/nx-cugraph/nx_cugraph/tests/test_community.py @@ -11,25 +11,36 @@ # See the License for the specific language governing permissions and # limitations under the License. import networkx as nx +import pytest import nx_cugraph as nxcg def test_louvain_isolated_nodes(): + is_nx_30_or_31 = hasattr(nx.classes, "backends") + def check(left, right): assert len(left) == len(right) assert set(map(frozenset, left)) == set(map(frozenset, right)) # Empty graph (no nodes) G = nx.Graph() - nx_result = nx.community.louvain_communities(G) - cg_result = nxcg.community.louvain_communities(G) - check(nx_result, cg_result) + if is_nx_30_or_31: + with pytest.raises(ZeroDivisionError): + nx.community.louvain_communities(G) + else: + nx_result = nx.community.louvain_communities(G) + cg_result = nxcg.community.louvain_communities(G) + check(nx_result, cg_result) # Graph with no edges G.add_nodes_from(range(5)) - nx_result = nx.community.louvain_communities(G) - cg_result = nxcg.community.louvain_communities(G) - check(nx_result, cg_result) + if is_nx_30_or_31: + with pytest.raises(ZeroDivisionError): + nx.community.louvain_communities(G) + else: + nx_result = nx.community.louvain_communities(G) + cg_result = nxcg.community.louvain_communities(G) + check(nx_result, cg_result) # Graph with isolated nodes G.add_edge(1, 2) nx_result = nx.community.louvain_communities(G) From a4c6353c9c53db8a27a9eed14db6b545c23c7c79 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Fri, 29 Sep 2023 09:41:59 -0500 Subject: [PATCH 4/5] Add docstrings for custom isolate functions --- python/nx-cugraph/nx_cugraph/algorithms/isolate.py | 6 ++++-- python/nx-cugraph/nx_cugraph/classes/digraph.py | 2 +- python/nx-cugraph/nx_cugraph/classes/graph.py | 2 +- python/nx-cugraph/nx_cugraph/convert.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/python/nx-cugraph/nx_cugraph/algorithms/isolate.py b/python/nx-cugraph/nx_cugraph/algorithms/isolate.py index 17e8f77087e..774627e84f6 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/isolate.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/isolate.py @@ -20,7 +20,7 @@ from nx_cugraph.utils import networkx_algorithm if TYPE_CHECKING: # pragma: no cover - import numpy as np + from nx_cugraph.typing import IndexValue __all__ = ["is_isolate", "isolates", "number_of_isolates"] @@ -37,6 +37,7 @@ def is_isolate(G, n): def _mark_isolates(G) -> cp.ndarray[bool]: + """Return a boolean mask array indicating indices of isolated nodes.""" mark_isolates = cp.ones(len(G), bool) mark_isolates[G.row_indices] = False if G.is_directed(): @@ -44,7 +45,8 @@ def _mark_isolates(G) -> cp.ndarray[bool]: return mark_isolates -def _isolates(G) -> cp.ndarray[np.int64]: +def _isolates(G) -> cp.ndarray[IndexValue]: + """Like isolates, but return an array of indices instead of an iterator of nodes.""" G = _to_graph(G) return cp.nonzero(_mark_isolates(G))[0] diff --git a/python/nx-cugraph/nx_cugraph/classes/digraph.py b/python/nx-cugraph/nx_cugraph/classes/digraph.py index 0aaf88fd793..72a1bff21a9 100644 --- a/python/nx-cugraph/nx_cugraph/classes/digraph.py +++ b/python/nx-cugraph/nx_cugraph/classes/digraph.py @@ -20,7 +20,7 @@ from .graph import Graph -if TYPE_CHECKING: +if TYPE_CHECKING: # pragma: no cover from nx_cugraph.typing import NodeKey __all__ = ["DiGraph"] diff --git a/python/nx-cugraph/nx_cugraph/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py index cd51a02dae8..ded4cc3943f 100644 --- a/python/nx-cugraph/nx_cugraph/classes/graph.py +++ b/python/nx-cugraph/nx_cugraph/classes/graph.py @@ -23,7 +23,7 @@ import nx_cugraph as nxcg -if TYPE_CHECKING: +if TYPE_CHECKING: # pragma: no cover from collections.abc import Iterable, Iterator from nx_cugraph.typing import ( diff --git a/python/nx-cugraph/nx_cugraph/convert.py b/python/nx-cugraph/nx_cugraph/convert.py index 9be8cac7877..1240ea71db7 100644 --- a/python/nx-cugraph/nx_cugraph/convert.py +++ b/python/nx-cugraph/nx_cugraph/convert.py @@ -24,7 +24,7 @@ import nx_cugraph as nxcg -if TYPE_CHECKING: +if TYPE_CHECKING: # pragma: no cover from nx_cugraph.typing import AttrKey, Dtype, EdgeValue, NodeValue __all__ = [ From 426fa080e0f39a6af2131ef03317d6b4495a50a4 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Mon, 2 Oct 2023 10:39:44 -0500 Subject: [PATCH 5/5] bump flake8-simplify to 0.21.0 --- python/nx-cugraph/lint.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/nx-cugraph/lint.yaml b/python/nx-cugraph/lint.yaml index 6a462a6af79..338ca7b230e 100644 --- a/python/nx-cugraph/lint.yaml +++ b/python/nx-cugraph/lint.yaml @@ -63,7 +63,7 @@ repos: # These versions need updated manually - flake8==6.1.0 - flake8-bugbear==23.9.16 - - flake8-simplify==0.20.0 + - flake8-simplify==0.21.0 - repo: https://github.com/asottile/yesqa rev: v1.5.0 hooks: