Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nx-cugraph: handle louvain with isolated nodes #3897

Merged
merged 10 commits into from
Oct 3, 2023
7 changes: 5 additions & 2 deletions python/nx-cugraph/_nx_cugraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,23 @@
# BEGIN: functions
"betweenness_centrality",
"edge_betweenness_centrality",
"is_isolate",
"isolates",
"louvain_communities",
"number_of_isolates",
# END: functions
},
"extra_docstrings": {
# BEGIN: extra_docstrings
"betweenness_centrality": "`weight` parameter is not yet supported.",
"edge_betweenness_centrality": "`weight` parameter is not yet supported.",
"louvain_communities": "`threshold` and `seed` parameters are currently ignored.",
"louvain_communities": "`seed` parameter is currently ignored.",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to add something in the docs about max_level being capped at 500?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added (max: 500) to the max_level parameter doc.

# END: extra_docstrings
},
"extra_parameters": {
# BEGIN: extra_parameters
"louvain_communities": {
"max_level : int, optional": "Upper limit of the number of macro-iterations.",
"max_level : int, optional": "Upper limit of the number of macro-iterations (max: 500).",
},
# END: extra_parameters
},
Expand Down
2 changes: 1 addition & 1 deletion python/nx-cugraph/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ repos:
# These versions need updated manually
- flake8==6.1.0
- flake8-bugbear==23.9.16
- flake8-simplify==0.20.0
- flake8-simplify==0.21.0
- repo: https://github.com/asottile/yesqa
rev: v1.5.0
hooks:
Expand Down
1 change: 1 addition & 0 deletions python/nx-cugraph/nx_cugraph/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@
# limitations under the License.
from . import centrality, community
from .centrality import *
from .isolate import *
28 changes: 23 additions & 5 deletions python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import warnings

import pylibcugraph as plc

Expand All @@ -22,27 +22,38 @@
not_implemented_for,
)

from ..isolate import _isolates

__all__ = ["louvain_communities"]


@not_implemented_for("directed")
@networkx_algorithm(
extra_params={
"max_level : int, optional": "Upper limit of the number of macro-iterations."
"max_level : int, optional": (
"Upper limit of the number of macro-iterations (max: 500)."
)
}
)
def louvain_communities(
G, weight="weight", resolution=1, threshold=0.0000001, seed=None, *, max_level=None
):
"""`threshold` and `seed` parameters are currently ignored."""
"""`seed` parameter is currently ignored."""
# NetworkX allows both directed and undirected, but cugraph only allows undirected.
seed = _seed_to_int(seed) # Unused, but ensure it's valid for future compatibility
G = _to_undirected_graph(G, weight)
if G.row_indices.size == 0:
# TODO: PLC doesn't handle empty graphs gracefully!
return [{key} for key in G._nodeiter_to_iter(range(len(G)))]
if max_level is None:
max_level = sys.maxsize
max_level = 500
elif max_level > 500:
warnings.warn(
f"max_level is set too high (={max_level}), setting it to 500.",
UserWarning,
stacklevel=2,
)
max_level = 500
vertices, clusters, modularity = plc.louvain(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
Expand All @@ -52,7 +63,14 @@ def louvain_communities(
do_expensive_check=False,
)
groups = _groupby(clusters, vertices)
return [set(G._nodearray_to_list(node_ids)) for node_ids in groups.values()]
rv = [set(G._nodearray_to_list(node_ids)) for node_ids in groups.values()]
# TODO: PLC doesn't handle isolated vertices yet, so this is a temporary fix
isolates = _isolates(G)
if isolates.size > 0:
isolates = isolates[isolates > vertices.max()]
if isolates.size > 0:
rv.extend({node} for node in G._nodearray_to_list(isolates))
return rv


@louvain_communities._can_run
Expand Down
63 changes: 63 additions & 0 deletions python/nx-cugraph/nx_cugraph/algorithms/isolate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from typing import TYPE_CHECKING

import cupy as cp

from nx_cugraph.convert import _to_graph
from nx_cugraph.utils import networkx_algorithm

if TYPE_CHECKING: # pragma: no cover
from nx_cugraph.typing import IndexValue

__all__ = ["is_isolate", "isolates", "number_of_isolates"]


@networkx_algorithm
def is_isolate(G, n):
G = _to_graph(G)
index = n if G.key_to_id is None else G.key_to_id[n]
return not (
(G.row_indices == index).any().tolist()
or G.is_directed()
and (G.col_indices == index).any().tolist()
)


def _mark_isolates(G) -> cp.ndarray[bool]:
"""Return a boolean mask array indicating indices of isolated nodes."""
mark_isolates = cp.ones(len(G), bool)
mark_isolates[G.row_indices] = False
if G.is_directed():
mark_isolates[G.col_indices] = False
return mark_isolates


def _isolates(G) -> cp.ndarray[IndexValue]:
"""Like isolates, but return an array of indices instead of an iterator of nodes."""
G = _to_graph(G)
return cp.nonzero(_mark_isolates(G))[0]


@networkx_algorithm
def isolates(G):
G = _to_graph(G)
return G._nodeiter_to_iter(iter(_isolates(G).tolist()))


@networkx_algorithm
def number_of_isolates(G):
G = _to_graph(G)
return _mark_isolates(G).sum().tolist()
2 changes: 1 addition & 1 deletion python/nx-cugraph/nx_cugraph/classes/digraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from .graph import Graph

if TYPE_CHECKING:
if TYPE_CHECKING: # pragma: no cover
from nx_cugraph.typing import NodeKey

__all__ = ["DiGraph"]
Expand Down
6 changes: 3 additions & 3 deletions python/nx-cugraph/nx_cugraph/classes/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

import nx_cugraph as nxcg

if TYPE_CHECKING:
if TYPE_CHECKING: # pragma: no cover
from collections.abc import Iterable, Iterator

from nx_cugraph.typing import (
Expand Down Expand Up @@ -245,9 +245,9 @@ def from_dcsc(
def __new__(cls, incoming_graph_data=None, **attr) -> Graph:
if incoming_graph_data is None:
new_graph = cls.from_coo(0, cp.empty(0, np.int32), cp.empty(0, np.int32))
elif incoming_graph_data.__class__ is new_graph.__class__:
elif incoming_graph_data.__class__ is cls:
new_graph = incoming_graph_data.copy()
elif incoming_graph_data.__class__ is new_graph.to_networkx_class():
elif incoming_graph_data.__class__ is cls.to_networkx_class():
new_graph = nxcg.from_networkx(incoming_graph_data, preserve_all_attrs=True)
else:
raise NotImplementedError
Expand Down
2 changes: 1 addition & 1 deletion python/nx-cugraph/nx_cugraph/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

import nx_cugraph as nxcg

if TYPE_CHECKING:
if TYPE_CHECKING: # pragma: no cover
from nx_cugraph.typing import AttrKey, Dtype, EdgeValue, NodeValue

__all__ = [
Expand Down
53 changes: 53 additions & 0 deletions python/nx-cugraph/nx_cugraph/tests/test_community.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import networkx as nx
import pytest

import nx_cugraph as nxcg


def test_louvain_isolated_nodes():
is_nx_30_or_31 = hasattr(nx.classes, "backends")

def check(left, right):
assert len(left) == len(right)
assert set(map(frozenset, left)) == set(map(frozenset, right))

# Empty graph (no nodes)
G = nx.Graph()
if is_nx_30_or_31:
with pytest.raises(ZeroDivisionError):
nx.community.louvain_communities(G)
else:
nx_result = nx.community.louvain_communities(G)
cg_result = nxcg.community.louvain_communities(G)
check(nx_result, cg_result)
# Graph with no edges
G.add_nodes_from(range(5))
if is_nx_30_or_31:
with pytest.raises(ZeroDivisionError):
nx.community.louvain_communities(G)
else:
nx_result = nx.community.louvain_communities(G)
cg_result = nxcg.community.louvain_communities(G)
check(nx_result, cg_result)
# Graph with isolated nodes
G.add_edge(1, 2)
nx_result = nx.community.louvain_communities(G)
cg_result = nxcg.community.louvain_communities(G)
check(nx_result, cg_result)
# Another one
G.add_edge(4, 4)
nx_result = nx.community.louvain_communities(G)
cg_result = nxcg.community.louvain_communities(G)
check(nx_result, cg_result)