diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 94612c9..f245318 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -56,7 +56,7 @@ repos: # These versions need updated manually - flake8==6.0.0 - flake8-comprehensions==3.10.1 - - flake8-bugbear==23.1.20 + - flake8-bugbear==23.2.13 - flake8-simplify==0.19.3 - repo: https://github.com/asottile/yesqa rev: v1.4.0 diff --git a/graphblas_algorithms/algorithms/centrality/katz.py b/graphblas_algorithms/algorithms/centrality/katz.py index 0fe35a5..3d21331 100644 --- a/graphblas_algorithms/algorithms/centrality/katz.py +++ b/graphblas_algorithms/algorithms/centrality/katz.py @@ -35,9 +35,9 @@ def katz_centrality( raise GraphBlasAlgorithmException("beta must have a value for every node") A = G._A - if A.ss.is_iso: + if (iso_value := G.get_property("iso_value")) is not None: # Fold iso-value into alpha - alpha *= A.ss.iso_value.get(1.0) + alpha *= iso_value.get(1.0) semiring = plus_first[float] else: semiring = plus_times[float] diff --git a/graphblas_algorithms/algorithms/core.py b/graphblas_algorithms/algorithms/core.py index 85ab592..d531c61 100644 --- a/graphblas_algorithms/algorithms/core.py +++ b/graphblas_algorithms/algorithms/core.py @@ -28,7 +28,7 @@ def k_truss(G: Graph, k) -> Graph: S = C # Remove isolate nodes - indices, _ = C.reduce_rowwise(monoid.any).to_coo() + indices, _ = C.reduce_rowwise(monoid.any).to_coo(values=False) Ktruss = C[indices, indices].new() # Convert back to networkx graph with correct node ids diff --git a/graphblas_algorithms/algorithms/dag.py b/graphblas_algorithms/algorithms/dag.py index cd1d4c6..3cceeef 100644 --- a/graphblas_algorithms/algorithms/dag.py +++ b/graphblas_algorithms/algorithms/dag.py @@ -9,7 +9,7 @@ def descendants(G, source): if source not in G._key_to_id: raise KeyError(f"The node {source} is not in the graph") index = G._key_to_id[source] - A = G._A + A = G.get_property("offdiag") q = Vector.from_coo(index, True, size=A.nrows, name="q") rv = q.dup(name="descendants") for _ in range(A.nrows): @@ -25,7 +25,7 @@ def ancestors(G, source): if source not in G._key_to_id: raise KeyError(f"The node {source} is not in the graph") index = G._key_to_id[source] - A = G._A + A = G.get_property("offdiag") q = Vector.from_coo(index, True, size=A.nrows, name="q") rv = q.dup(name="descendants") for _ in range(A.nrows): diff --git a/graphblas_algorithms/algorithms/exceptions.py b/graphblas_algorithms/algorithms/exceptions.py index 883ffeb..f4ef352 100644 --- a/graphblas_algorithms/algorithms/exceptions.py +++ b/graphblas_algorithms/algorithms/exceptions.py @@ -12,3 +12,7 @@ class EmptyGraphError(GraphBlasAlgorithmException): class PointlessConcept(GraphBlasAlgorithmException): pass + + +class Unbounded(GraphBlasAlgorithmException): + pass diff --git a/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py b/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py index ce81ce1..1623819 100644 --- a/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py +++ b/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py @@ -49,11 +49,10 @@ def pagerank( row_degrees = G.get_property("plus_rowwise+") # XXX: What about self-edges? S = (alpha / row_degrees).new(name="S") - if A.ss.is_iso: + if (iso_value := G.get_property("iso_value")) is not None: # Fold iso-value of A into S # This lets us use the plus_first semiring, which is faster - iso_value = A.ss.iso_value - if iso_value != 1: + if iso_value.get(1) != 1: S *= iso_value semiring = plus_first[float] else: diff --git a/graphblas_algorithms/algorithms/shortest_paths/__init__.py b/graphblas_algorithms/algorithms/shortest_paths/__init__.py index 60613e6..9fc57fb 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/__init__.py +++ b/graphblas_algorithms/algorithms/shortest_paths/__init__.py @@ -1,2 +1,3 @@ from .dense import * from .generic import * +from .weighted import * diff --git a/graphblas_algorithms/algorithms/shortest_paths/generic.py b/graphblas_algorithms/algorithms/shortest_paths/generic.py index 864ac2a..f91c9cf 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/generic.py +++ b/graphblas_algorithms/algorithms/shortest_paths/generic.py @@ -10,7 +10,7 @@ def has_path(G, source, target): dst = G._key_to_id[target] if src == dst: return True - A = G._A + A = G.get_property("offdiag") q_src = Vector.from_coo(src, True, size=A.nrows, name="q_src") seen_src = q_src.dup(name="seen_src") q_dst = Vector.from_coo(dst, True, size=A.nrows, name="q_dst") diff --git a/graphblas_algorithms/algorithms/shortest_paths/weighted.py b/graphblas_algorithms/algorithms/shortest_paths/weighted.py new file mode 100644 index 0000000..f073180 --- /dev/null +++ b/graphblas_algorithms/algorithms/shortest_paths/weighted.py @@ -0,0 +1,207 @@ +import numpy as np +from graphblas import Matrix, Vector, binary, monoid, replace, select, unary +from graphblas.semiring import any_pair, min_plus + +from ..exceptions import Unbounded + +__all__ = [ + "single_source_bellman_ford_path_length", + "bellman_ford_path_lengths", +] + + +def single_source_bellman_ford_path_length(G, source): + # No need for `is_weighted=` keyword, b/c this is assumed to be weighted (I think) + index = G._key_to_id[source] + if G.get_property("is_iso"): + # If the edges are iso-valued (and positive), then we can simply do level BFS + is_negative, iso_value = G.get_properties("has_negative_edges+ iso_value") + if not is_negative: + d = _bfs_level(G, source, dtype=iso_value.dtype) + if iso_value != 1: + d *= iso_value + return d + # It's difficult to detect negative cycles with BFS + if G._A[index, index].get() is not None: + raise Unbounded("Negative cycle detected.") + if not G.is_directed() and G._A[index, :].nvals > 0: + # For undirected graphs, any negative edge is a cycle + raise Unbounded("Negative cycle detected.") + + # Use `offdiag` instead of `A`, b/c self-loops don't contribute to the result, + # and negative self-loops are easy negative cycles to avoid. + # We check if we hit a self-loop negative cycle at the end. + A, has_negative_diagonal = G.get_properties("offdiag has_negative_diagonal") + if A.dtype == bool: + # Should we upcast e.g. INT8 to INT64 as well? + dtype = int + else: + dtype = A.dtype + n = A.nrows + d = Vector(dtype, n, name="single_source_bellman_ford_path_length") + d[index] = 0 + cur = d.dup(name="cur") + mask = Vector(bool, n, name="mask") + one = unary.one[bool] + for _i in range(n - 1): + # This is a slightly modified Bellman-Ford algorithm. + # `cur` is the current frontier of values that improved in the previous iteration. + # This means that in this iteration we drop values from `cur` that are not better. + cur << min_plus(cur @ A) + + # Mask is True where cur not in d or cur < d + mask << one(cur) + mask(binary.second) << binary.lt(cur & d) + + # Drop values from `cur` that didn't improve + cur(mask.V, replace) << cur + if cur.nvals == 0: + break + # Update `d` with values that improved + d(cur.S) << cur + else: + # Check for negative cycle when for loop completes without breaking + cur << min_plus(cur @ A) + mask << binary.lt(cur & d) + if mask.reduce(monoid.lor): + raise Unbounded("Negative cycle detected.") + if has_negative_diagonal: + # We removed diagonal entries above, so check if we visited one with a negative weight + diag = G.get_property("diag") + cur << select.valuelt(diag, 0) + if any_pair(d @ cur): + raise Unbounded("Negative cycle detected.") + return d + + +def bellman_ford_path_lengths(G, nodes=None, *, expand_output=False): + """ + + Parameters + ---------- + expand_output : bool, default False + When False, the returned Matrix has one row per node in nodes. + When True, the returned Matrix has the same shape as the input Matrix. + """ + # Same algorithms as in `single_source_bellman_ford_path_length`, but with + # `Cur` as a Matrix with each row corresponding to a source node. + if G.get_property("is_iso"): + is_negative, iso_value = G.get_properties("has_negative_edges+ iso_value") + if not is_negative: + D = _bfs_levels(G, nodes, dtype=iso_value.dtype) + if iso_value != 1: + D *= iso_value + if nodes is not None and expand_output and D.ncols != D.nrows: + ids = G.list_to_ids(nodes) + rv = Matrix(D.dtype, D.ncols, D.ncols, name=D.name) + rv[ids, :] = D + return rv + return D + if not G.is_directed(): + # For undirected graphs, any negative edge is a cycle + if nodes is not None: + ids = G.list_to_ids(nodes) + if G._A[ids, :].nvals > 0: + raise Unbounded("Negative cycle detected.") + elif G._A.nvals > 0: + raise Unbounded("Negative cycle detected.") + + A, has_negative_diagonal = G.get_properties("offdiag has_negative_diagonal") + if A.dtype == bool: + dtype = int + else: + dtype = A.dtype + n = A.nrows + if nodes is None: + # TODO: `D = Vector.from_scalar(0, n, dtype).diag()` + D = Vector(dtype, n, name="bellman_ford_path_lengths_vector") + D << 0 + D = D.diag(name="bellman_ford_path_lengths") + else: + ids = G.list_to_ids(nodes) + D = Matrix.from_coo( + np.arange(len(ids), dtype=np.uint64), + ids, + 0, + dtype, + nrows=len(ids), + ncols=n, + name="bellman_ford_path_lengths", + ) + Cur = D.dup(name="Cur") + Mask = Matrix(bool, D.nrows, D.ncols, name="Mask") + one = unary.one[bool] + for _i in range(n - 1): + Cur << min_plus(Cur @ A) + Mask << one(Cur) + Mask(binary.second) << binary.lt(Cur & D) + Cur(Mask.V, replace) << Cur + if Cur.nvals == 0: + break + D(Cur.S) << Cur + else: + Cur << min_plus(Cur @ A) + Mask << binary.lt(Cur & D) + if Mask.reduce_scalar(monoid.lor): + raise Unbounded("Negative cycle detected.") + if has_negative_diagonal: + diag = G.get_property("diag") + cur = select.valuelt(diag, 0) + if any_pair(D @ cur).nvals > 0: + raise Unbounded("Negative cycle detected.") + if nodes is not None and expand_output and D.ncols != D.nrows: + rv = Matrix(D.dtype, n, n, name=D.name) + rv[ids, :] = D + return rv + return D + + +def _bfs_level(G, source, *, dtype=int): + if dtype == bool: + dtype = int + index = G._key_to_id[source] + A = G.get_property("offdiag") + n = A.nrows + v = Vector(dtype, n, name="bfs_level") + q = Vector(bool, n, name="q") + v[index] = 0 + q[index] = True + any_pair_bool = any_pair[bool] + for i in range(1, n): + q(~v.S, replace) << any_pair_bool(q @ A) + if q.nvals == 0: + break + v(q.S) << i + return v + + +def _bfs_levels(G, nodes=None, *, dtype=int): + if dtype == bool: + dtype = int + A = G.get_property("offdiag") + n = A.nrows + if nodes is None: + # TODO: `D = Vector.from_scalar(0, n, dtype).diag()` + D = Vector(dtype, n, name="bfs_levels_vector") + D << 0 + D = D.diag(name="bfs_levels") + else: + ids = G.list_to_ids(nodes) + D = Matrix.from_coo( + np.arange(len(ids), dtype=np.uint64), + ids, + 0, + dtype, + nrows=len(ids), + ncols=n, + name="bfs_levels", + ) + Q = Matrix(bool, D.nrows, D.ncols, name="Q") + Q << unary.one[bool](D) + any_pair_bool = any_pair[bool] + for i in range(1, n): + Q(~D.S, replace) << any_pair_bool(Q @ A) + if Q.nvals == 0: + break + D(Q.S) << i + return D diff --git a/graphblas_algorithms/algorithms/simple_paths.py b/graphblas_algorithms/algorithms/simple_paths.py index 646787c..44ba66e 100644 --- a/graphblas_algorithms/algorithms/simple_paths.py +++ b/graphblas_algorithms/algorithms/simple_paths.py @@ -8,7 +8,7 @@ def is_simple_path(G, nodes): return False if len(nodes) == 1: return nodes[0] in G - A = G._A + A = G._A # offdiag instead? if A.nvals < len(nodes) - 1: return False key_to_id = G._key_to_id diff --git a/graphblas_algorithms/classes/_caching.py b/graphblas_algorithms/classes/_caching.py index 24de3ec..426ba61 100644 --- a/graphblas_algorithms/classes/_caching.py +++ b/graphblas_algorithms/classes/_caching.py @@ -1,6 +1,8 @@ -from graphblas import op +from graphblas import Scalar, dtypes, op from graphblas.core import operator +NONNEGATIVE_DTYPES = {dtypes.BOOL, dtypes.UINT8, dtypes.UINT16, dtypes.UINT32, dtypes.UINT64} + def get_reduce_to_vector(key, opname, methodname): op_ = op.from_string(opname) @@ -142,7 +144,7 @@ def get_reduction(G, mask=None): cache[f"{keybase}+"] = cache[key] return cache[key] - else: + elif key[-1] == "+": def get_reduction(G, mask=None): A = G._A @@ -170,4 +172,18 @@ def get_reduction(G, mask=None): cache[f"{keybase}-"] = cache[key] return cache[key] + elif key.endswith("_diagonal"): + + def get_reduction(G, mask=None): + A = G._A + cache = G._cache + if key not in cache: + if not G.get_property("has_self_edges"): + cache[key] = Scalar(op_[A.dtype].return_type, name=key) + else: + cache[key] = G.get_property("diag").reduce(op_).new(name=key) + return cache[key] + + else: # pragma: no cover (sanity) + raise RuntimeError() return get_reduction diff --git a/graphblas_algorithms/classes/_utils.py b/graphblas_algorithms/classes/_utils.py index 629316a..8a8e6c9 100644 --- a/graphblas_algorithms/classes/_utils.py +++ b/graphblas_algorithms/classes/_utils.py @@ -65,14 +65,14 @@ def dict_to_vector(self, d, *, size=None, dtype=None, name=None): return Vector.from_coo(indices, values, size=size, dtype=dtype, name=name) -def list_to_vector(self, nodes, dtype=bool, *, size=None, name=None): +def list_to_vector(self, nodes, dtype=None, *, values=True, size=None, name=None): if nodes is None: return None if size is None: size = len(self) key_to_id = self._key_to_id index = [key_to_id[key] for key in nodes] - return Vector.from_coo(index, True, size=size, dtype=dtype, name=name) + return Vector.from_coo(index, values, size=size, dtype=dtype, name=name) def list_to_mask(self, nodes, *, size=None, name="mask"): diff --git a/graphblas_algorithms/classes/digraph.py b/graphblas_algorithms/classes/digraph.py index 0984dcd..e0b5751 100644 --- a/graphblas_algorithms/classes/digraph.py +++ b/graphblas_algorithms/classes/digraph.py @@ -1,18 +1,23 @@ from collections import defaultdict import graphblas as gb -from graphblas import Matrix, Vector, binary, replace, select, unary +from graphblas import Matrix, binary, replace, select, unary import graphblas_algorithms as ga from . import _utils from ._caching import get_reduce_to_scalar, get_reduce_to_vector -from .graph import Graph - - -def get_A(G, mask=None): - """A""" - return G._A +from .graph import ( + Graph, + get_A, + get_diag, + get_iso_value, + get_offdiag, + has_negative_diagonal, + has_negative_edgesm, + has_negative_edgesp, + is_iso, +) def get_AT(G, mask=None): @@ -24,22 +29,6 @@ def get_AT(G, mask=None): return cache["AT"] -def get_offdiag(G, mask=None): - """select.offdiag(A)""" - A = G._A - cache = G._cache - if "offdiag" not in cache: - if cache.get("has_self_edges") is False: - cache["offdiag"] = A - else: - cache["offdiag"] = select.offdiag(A).new(name="offdiag") - if "has_self_edges" not in cache: - cache["has_self_edges"] = A.nvals > cache["offdiag"].nvals - if not cache["has_self_edges"]: - cache["offdiag"] = A - return cache["offdiag"] - - def get_Up(G, mask=None): """select.triu(A)""" A = G._A @@ -126,26 +115,6 @@ def get_Lm(G, mask=None): return cache["L-"] -def get_diag(G, mask=None): - """select.diag(A)""" - A = G._A - cache = G._cache - if "diag" not in cache: - if cache.get("has_self_edges") is False: - cache["diag"] = Vector(A.dtype, size=A.nrows, name="diag") - elif "U+" in cache: - cache["diag"] = cache["U+"].diag(name="diag") - elif "L+" in cache: - cache["diag"] = cache["L+"].diag(name="diag") - else: - cache["diag"] = A.diag(name="diag") - if "has_self_edges" not in cache: - cache["has_self_edges"] = cache["diag"].nvals > 0 - if mask is not None: - return cache["diag"].dup(mask=mask) - return cache["diag"] - - def get_recip_degreesp(G, mask=None): """pair(A & A.T).reduce_rowwise()""" A = G._A @@ -388,6 +357,8 @@ def has_self_edges(G, mask=None): cache["has_self_edges"] = cache["U-"].nvals < cache["U+"].nvals else: cache["has_self_edges"] = cache["U-"].nvals + cache["L-"].nvals < A.nvals + elif cache.get("has_negative_diagonal") is True: + cache["has_self_edges"] = True elif "total_recip-" in cache and "total_recip+" in cache: cache["has_self_edges"] = cache["total_recip+"] > cache["total_recip-"] elif "row_degrees-" in cache and "row_degrees+" in cache: @@ -473,9 +444,14 @@ def __missing__(self, key): get_reduction = get_reduce_to_scalar(key, opname) else: get_reduction = get_reduce_to_vector(key, opname, methodname) - self[key] = get_reduction - return get_reduction - raise KeyError(key) + elif key.endswith("_diagonal"): + # e.g., min_diagonal + opname = key[: -len("_diagonal")] + get_reduction = get_reduce_to_scalar(key, opname) + else: + raise KeyError(key) + self[key] = get_reduction + return get_reduction class DiGraph(Graph): @@ -507,6 +483,12 @@ class DiGraph(Graph): "total_degrees-", "total_recip+", # scalar; I don't like this name "total_recip-", # scalar; I don't like this name + "min_diagonal", + "min_element+", + "min_element-", + "has_negative_diagonal", + "has_negative_edges-", + "has_negative_edges+", "has_self_edges", ] ) @@ -528,6 +510,11 @@ class DiGraph(Graph): "total_degrees-": get_total_degreesm, "total_recip+": get_total_recipp, "total_recip-": get_total_recipm, + "is_iso": is_iso, + "iso_value": get_iso_value, + "has_negative_diagonal": has_negative_diagonal, + "has_negative_edges-": has_negative_edgesm, + "has_negative_edges+": has_negative_edgesp, "has_self_edges": has_self_edges, } ) @@ -568,6 +555,7 @@ def __init__(self, incoming_graph_data=None, *, key_to_id=None, **attr): list_to_vector = _utils.list_to_vector list_to_mask = _utils.list_to_mask list_to_ids = _utils.list_to_ids + list_to_keys = _utils.list_to_keys matrix_to_dicts = _utils.matrix_to_dicts matrix_to_nodenodemap = _utils.matrix_to_nodenodemap matrix_to_vectornodemap = _utils.matrix_to_vectornodemap diff --git a/graphblas_algorithms/classes/graph.py b/graphblas_algorithms/classes/graph.py index 65e4cea..0688518 100644 --- a/graphblas_algorithms/classes/graph.py +++ b/graphblas_algorithms/classes/graph.py @@ -6,7 +6,7 @@ import graphblas_algorithms as ga from . import _utils -from ._caching import get_reduce_to_scalar, get_reduce_to_vector +from ._caching import NONNEGATIVE_DTYPES, get_reduce_to_scalar, get_reduce_to_vector def get_A(G, mask=None): @@ -126,9 +126,72 @@ def get_diag(G, mask=None): cache["diag"] = A.diag(name="diag") if "has_self_edges" not in cache: cache["has_self_edges"] = cache["diag"].nvals > 0 + if mask is not None: + return cache["diag"].dup(mask=mask) return cache["diag"] +def has_negative_diagonal(G, mask=None): + A = G._A + cache = G._cache + if "has_negative_diagonal" not in cache: + if A.dtype in NONNEGATIVE_DTYPES or A.dtype._is_udt or cache.get("has_self_edges") is False: + cache["has_negative_diagonal"] = False + elif ( + cache.get("has_negative_edges+") is True + and cache.get("has_negative_edges-") is False + or cache.get("has_negative_edges+") is True + and cache.get("min_element-", 0) >= 0 + or cache.get("min_element+", 0) < 0 + and cache.get("min_element+", 0) < cache.get("min_element-", 0) + ): + cache["has_negative_diagonal"] = True + else: + cache["has_negative_diagonal"] = G.get_property("min_diagonal").get(0) < 0 + return cache["has_negative_diagonal"] + + +def has_negative_edgesp(G, mask=None): + A = G._A + cache = G._cache + if "has_negative_edges+" not in cache: + if A.dtype in NONNEGATIVE_DTYPES or A.dtype._is_udt: + cache["has_negative_edges+"] = False + elif ( + cache.get("has_negative_edges-") + or cache.get("min_element+", 0) < 0 + or cache.get("min_element-", 0) < 0 + or cache.get("min_diagonal", 0) < 0 + or cache.get("has_negative_diagonal") + ): + cache["has_negative_edges+"] = True + elif cache.get("iso_value") is not None: + cache["has_negative_edges+"] = cache["iso_value"].get(0) < 0 + elif cache.get("has_negative_edges-") is False: + cache["has_negative_edges+"] = G.get_property("min_diagonal").get(0) < 0 + else: + cache["has_negative_edges+"] = G.get_property("min_element+").get(0) < 0 + return cache["has_negative_edges+"] + + +def has_negative_edgesm(G, mask=None): + A = G._A + cache = G._cache + if "has_negative_edges-" not in cache: + if A.dtype in NONNEGATIVE_DTYPES or A.dtype._is_udt: + cache["has_negative_edges-"] = False + elif ( + cache.get("has_negative_edges+") + and cache.get("has_self_edges") is False + or cache.get("has_negative_edges+") + and cache.get("has_negative_diagonal") is False + ): + cache["has_negative_edges-"] = True + else: + cache["has_negative_edges-"] = G.get_property("min_element-").get(0) < 0 + return cache["has_negative_edges-"] + + def has_self_edges(G, mask=None): """A.diag().nvals > 0""" A = G._A @@ -144,11 +207,48 @@ def has_self_edges(G, mask=None): cache["has_self_edges"] = 2 * cache["U-"].nvals < A.nvals elif "offdiag" in cache: cache["has_self_edges"] = A.nvals > cache["offdiag"].nvals + elif cache.get("has_negative_diagonal") is True: + cache["has_self_edges"] = True else: G.get_property("diag") return cache["has_self_edges"] +def is_iso(G, mask=None): + A = G._A + cache = G._cache + if "is_iso" not in cache: + if "iso_value" in cache: + cache["is_iso"] = cache["iso_value"] is not None + else: + # SuiteSparse:GraphBLAS. `A` may still be iso-valued even if `A.ss.is_iso` is False. + # Should we check this or rely on `A.ss.is_iso` b/c it's fast and should usually work? + cache["is_iso"] = A.ss.is_iso + return cache["is_iso"] + + +def get_iso_value(G, mask=None): + A = G._A + cache = G._cache + if "iso_value" not in cache: + if "is_iso" in cache: + if cache["is_iso"]: + # SuiteSparse:GraphBLAS + cache["iso_value"] = A.ss.iso_value + else: + cache["iso_value"] + else: + # min_val, max_val = G.get_properties('min_element+ max_element+') + # SuiteSparse:GraphBLAS + if A.ss.is_iso: + cache["iso_value"] = A.ss.iso_value + cache["is_iso"] = True + else: + cache["iso_value"] = None + cache["is_iso"] = False + return cache["iso_value"] + + def to_undirected_graph(G, weight=None, dtype=None): # We should do some sanity checks here to ensure we're returning a valid undirected graph if isinstance(G, Graph): @@ -190,9 +290,14 @@ def __missing__(self, key): else: get_reduction = get_reduce_to_vector(key, opname, methodname) self[f"{opname}_columnwise{key[-1]}"] = get_reduction - self[key] = get_reduction - return get_reduction - raise KeyError(key) + elif key.endswith("_diagonal"): + # e.g., min_diagonal + opname = key[: -len("_diagonal")] + get_reduction = get_reduce_to_scalar(key, opname) + else: + raise KeyError(key) + self[key] = get_reduction + return get_reduction class Graph: @@ -216,6 +321,12 @@ class Graph: "diag", "count_rowwise+", "count_rowwise-", + "min_diagonal", + "min_element+", + "min_element-", + "has_negative_diagonal", + "has_negative_edges-", + "has_negative_edges+", "has_self_edges", ] ) @@ -231,6 +342,11 @@ class Graph: "U-": get_Um, "L-": get_Lm, "diag": get_diag, + "is_iso": is_iso, + "iso_value": get_iso_value, + "has_negative_diagonal": has_negative_diagonal, + "has_negative_edges-": has_negative_edgesm, + "has_negative_edges+": has_negative_edgesp, "has_self_edges": has_self_edges, } ) diff --git a/graphblas_algorithms/interface.py b/graphblas_algorithms/interface.py index e88cc4f..1a142c3 100644 --- a/graphblas_algorithms/interface.py +++ b/graphblas_algorithms/interface.py @@ -60,6 +60,12 @@ class Dispatcher: nxapi.shortest_paths.dense.floyd_warshall_predecessor_and_distance ) has_path = nxapi.shortest_paths.generic.has_path + all_pairs_bellman_ford_path_length = ( + nxapi.shortest_paths.weighted.all_pairs_bellman_ford_path_length + ) + single_source_bellman_ford_path_length = ( + nxapi.shortest_paths.weighted.single_source_bellman_ford_path_length + ) # Simple Paths is_simple_path = nxapi.simple_paths.is_simple_path # S Metric @@ -103,13 +109,23 @@ def on_start_tests(items): import pytest except ImportError: # pragma: no cover (import) return + + def key(testpath): + filename, path = testpath.split(":") + classname, testname = path.split(".") + return (testname, frozenset({classname, filename})) + + # Reasons to skip tests multi_attributed = "unable to handle multi-attributed graphs" multidigraph = "unable to handle MultiDiGraph" - freeze = frozenset + multigraph = "unable to handle MultiGraph" + + # Which tests to skip skip = { - ("test_attributes", freeze({"TestBoruvka", "test_mst.py"})): multi_attributed, - ("test_weight_attribute", freeze({"TestBoruvka", "test_mst.py"})): multi_attributed, - ("test_zero_weight", freeze({"TestFloyd", "test_dense.py"})): multidigraph, + key("test_mst.py:TestBoruvka.test_attributes"): multi_attributed, + key("test_mst.py:TestBoruvka.test_weight_attribute"): multi_attributed, + key("test_dense.py:TestFloyd.test_zero_weight"): multidigraph, + key("test_weighted.py:TestBellmanFordAndGoldbergRadzik.test_multigraph"): multigraph, } for item in items: kset = set(item.keywords) diff --git a/graphblas_algorithms/nxapi/cluster.py b/graphblas_algorithms/nxapi/cluster.py index e252963..457f5e6 100644 --- a/graphblas_algorithms/nxapi/cluster.py +++ b/graphblas_algorithms/nxapi/cluster.py @@ -94,7 +94,7 @@ def _split(L, k): # TODO: should this move into algorithms? def _square_clustering_split(G, node_ids=None, *, nsplits): if node_ids is None: - node_ids = G._A.reduce_rowwise(monoid.any).to_coo()[0] + node_ids, _ = G._A.reduce_rowwise(monoid.any).to_coo(values=False) result = None for chunk_ids in _split(node_ids, nsplits): res = algorithms.square_clustering(G, chunk_ids) @@ -105,27 +105,37 @@ def _square_clustering_split(G, node_ids=None, *, nsplits): return result -def square_clustering(G, nodes=None, *, nsplits=None): +def square_clustering(G, nodes=None, *, nsplits="auto"): + # `nsplits` is used to split the computation into chunks. + # square_clustering computes `A @ A`, which can get very large, even dense. + # The default `nsplits` is to choose the number so that `Asubset @ A` + # will be about 256 MB if dense. G = to_undirected_graph(G) if len(G) == 0: return {} - elif nodes is None: + if nsplits == "auto": + # TODO: make a utility function for this that can be reused + # Also, should we use `chunksize` instead of `nsplits`? + targetsize = 256 * 1024 * 1024 # 256 MB + nsplits = len(G) ** 2 * G._A.dtype.np_type.itemsize // targetsize + if nsplits <= 1: + nsplits = None + if nodes is None: # Should we use this one for subsets of nodes as well? if nsplits is None: result = algorithms.square_clustering(G) else: result = _square_clustering_split(G, nsplits=nsplits) return G.vector_to_nodemap(result, fill_value=0) - elif nodes in G: + if nodes in G: idx = G._key_to_id[nodes] return algorithms.single_square_clustering(G, idx) + ids = G.list_to_ids(nodes) + if nsplits is None: + result = algorithms.square_clustering(G, ids) else: - ids = G.list_to_ids(nodes) - if nsplits is None: - result = algorithms.square_clustering(G, ids) - else: - result = _square_clustering_split(G, ids, nsplits=nsplits) - return G.vector_to_nodemap(result) + result = _square_clustering_split(G, ids, nsplits=nsplits) + return G.vector_to_nodemap(result) @not_implemented_for("directed") diff --git a/graphblas_algorithms/nxapi/exception.py b/graphblas_algorithms/nxapi/exception.py index 0c5b666..2630384 100644 --- a/graphblas_algorithms/nxapi/exception.py +++ b/graphblas_algorithms/nxapi/exception.py @@ -8,6 +8,9 @@ class NetworkXError(Exception): class NetworkXPointlessConcept(Exception): pass + class NetworkXUnbounded(Exception): + pass + class NodeNotFound(Exception): pass @@ -18,6 +21,7 @@ class PowerIterationFailedConvergence(Exception): from networkx import ( NetworkXError, NetworkXPointlessConcept, + NetworkXUnbounded, NodeNotFound, PowerIterationFailedConvergence, ) diff --git a/graphblas_algorithms/nxapi/shortest_paths/__init__.py b/graphblas_algorithms/nxapi/shortest_paths/__init__.py index 60613e6..9fc57fb 100644 --- a/graphblas_algorithms/nxapi/shortest_paths/__init__.py +++ b/graphblas_algorithms/nxapi/shortest_paths/__init__.py @@ -1,2 +1,3 @@ from .dense import * from .generic import * +from .weighted import * diff --git a/graphblas_algorithms/nxapi/shortest_paths/weighted.py b/graphblas_algorithms/nxapi/shortest_paths/weighted.py new file mode 100644 index 0000000..1c5e759 --- /dev/null +++ b/graphblas_algorithms/nxapi/shortest_paths/weighted.py @@ -0,0 +1,62 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +from ..exception import NetworkXUnbounded, NodeNotFound + +__all__ = [ + "all_pairs_bellman_ford_path_length", + "single_source_bellman_ford_path_length", +] + + +def all_pairs_bellman_ford_path_length(G, weight="weight", *, chunksize="auto"): + # Larger chunksize offers more parallelism, but uses more memory. + # Chunksize indicates for how many source nodes to compute at one time. + # The default is to choose the number of rows so the result, if dense, + # will be about 10MB. + G = to_graph(G, weight=weight) + if chunksize == "auto": + # TODO: make a utility function for this that can be reused + targetsize = 10 * 1024 * 1024 # 10 MB + chunksize = max(1, targetsize // (len(G) * G._A.dtype.np_type.itemsize)) + + if chunksize is None or chunksize <= 0 or chunksize >= len(G): + # All at once + try: + D = algorithms.bellman_ford_path_lengths(G) + except algorithms.exceptions.Unbounded as e: + raise NetworkXUnbounded(*e.args) from e + yield from G.matrix_to_nodenodemap(D).items() + elif chunksize < 2: + for source in G: + try: + d = algorithms.single_source_bellman_ford_path_length(G, source) + except algorithms.exceptions.Unbounded as e: + raise NetworkXUnbounded(*e.args) from e + yield (source, G.vector_to_nodemap(d)) + else: + # We should probably make a utility function for chunking + nodes = list(G) + for start, stop in zip( + range(0, len(nodes), chunksize), range(chunksize, len(nodes) + chunksize, chunksize) + ): + cur_nodes = nodes[start:stop] + try: + D = algorithms.bellman_ford_path_lengths(G, cur_nodes) + except algorithms.exceptions.Unbounded as e: + raise NetworkXUnbounded(*e.args) from e + for i, source in enumerate(cur_nodes): + d = D[i, :].new(name=f"all_pairs_bellman_ford_path_length_{i}") + yield (source, G.vector_to_nodemap(d)) + + +def single_source_bellman_ford_path_length(G, source, weight="weight"): + # TODO: what if weight is a function? + G = to_graph(G, weight=weight) + try: + d = algorithms.single_source_bellman_ford_path_length(G, source) + except algorithms.exceptions.Unbounded as e: + raise NetworkXUnbounded(*e.args) from e + except KeyError as e: + raise NodeNotFound(*e.args) from e + return G.vector_to_nodemap(d) diff --git a/scripts/bench.py b/scripts/bench.py index e67640b..0feca1a 100755 --- a/scripts/bench.py +++ b/scripts/bench.py @@ -1,5 +1,6 @@ #!/usr/bin/env python import argparse +import gc import json import os import statistics @@ -117,6 +118,7 @@ def stime(time): } # Is square_clustering undirected only? graphblas-algorthms doesn't implement it for directed undirected_only = {"generalized_degree", "k_truss", "triangles", "square_clustering"} +returns_iterators = {"all_pairs_bellman_ford_path_length", "isolates"} def getfunction(functionname, backend): @@ -145,19 +147,28 @@ def getgraph(dataname, backend="graphblas", functionname=None): return readfile(filename, is_symmetric, backend) -def main(dataname, backend, functionname, time=3.0, n=None, extra=None, display=True): +def main( + dataname, backend, functionname, time=3.0, n=None, extra=None, display=True, enable_gc=False +): G = getgraph(dataname, backend, functionname) func = getfunction(functionname, backend) benchstring = functioncall.get(functionname, "func(G)") if extra is not None: benchstring = f"{benchstring[:-1]}, {extra})" + if functionname in returns_iterators: + benchstring = f"for _ in {benchstring}: pass" globals = {"func": func, "G": G} if functionname in poweriteration: benchstring = f"try:\n {benchstring}\nexcept exc:\n pass" globals["exc"] = nx.PowerIterationFailedConvergence if backend == "graphblas": benchstring = f"G._cache.clear()\n{benchstring}" - timer = timeit.Timer(benchstring, globals=globals) + if enable_gc: + setup = "gc.enable()" + globals["gc"] = gc + else: + setup = "pass" + timer = timeit.Timer(benchstring, setup=setup, globals=globals) if display: line = f"Backend = {backend}, function = {functionname}, data = {dataname}" if extra is not None: @@ -234,6 +245,11 @@ def main(dataname, backend, functionname, time=3.0, n=None, extra=None, display= action="store_true", help="Print results as json instead of human-readable text", ) + parser.add_argument( + "--gc", + action="store_true", + help="Enable the garbage collector during timing (may help if running out of memory)", + ) parser.add_argument("-f", "--func", required=True, help="Which function to benchmark") parser.add_argument("--extra", help="Extra string to add to the function call") args = parser.parse_args() @@ -245,6 +261,7 @@ def main(dataname, backend, functionname, time=3.0, n=None, extra=None, display= n=args.n, extra=args.extra, display=not args.json, + enable_gc=args.gc, ) if args.json: print(json.dumps(info)) diff --git a/scripts/scipy_impl.py b/scripts/scipy_impl.py index adf7ee3..2970be5 100644 --- a/scripts/scipy_impl.py +++ b/scripts/scipy_impl.py @@ -52,3 +52,14 @@ def pagerank( return x # return dict(zip(nodelist, map(float, x))) raise nx.PowerIterationFailedConvergence(max_iter) + + +def all_pairs_bellman_ford_path_length(A, weight="weight"): + for source in range(A.shape[0]): + d = single_source_bellman_ford_path_length(A, source) + yield (source, d) + + +def single_source_bellman_ford_path_length(A, source, weight="weight"): + return scipy.sparse.csgraph.bellman_ford(A, indices=source) # So slow! + # return scipy.sparse.csgraph.dijkstra(A, indices=source) # Faster