Skip to content

Commit

Permalink
Add {all_pairs,single_source}_bellman_ford_path_length (#44)
Browse files Browse the repository at this point in the history
* Add `{all_pairs,single_source}_bellman_ford_path_length`

That is, add these to `nxapi`:
- `all_pairs_bellman_ford_path_length`
- `single_source_bellman_ford_path_length`

* Implement `algorithms.bellman_ford_path_lengths` to compute in chunks

* Ignore diagonals during Bellman-Ford

* Add comment, and use `offdiag` more places.

* Do level BFS for Bellman-Ford when iso-valued (and non-negative)

* Use `"iso_value"` property more places instead of `A.ss.iso_value`

* Fail fast in these unlikely, but easily detected, cases

* Allow garbage collector to be enabled during benchmarks

* Automatically choose appropriate chunksize

* Use `nsplits="auto"` in square_clustering (default to 256 MB chunks)
  • Loading branch information
eriknw authored Feb 17, 2023
1 parent 0b649b2 commit 1180e1c
Show file tree
Hide file tree
Showing 21 changed files with 533 additions and 81 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ repos:
# These versions need updated manually
- flake8==6.0.0
- flake8-comprehensions==3.10.1
- flake8-bugbear==23.1.20
- flake8-bugbear==23.2.13
- flake8-simplify==0.19.3
- repo: https://github.com/asottile/yesqa
rev: v1.4.0
Expand Down
4 changes: 2 additions & 2 deletions graphblas_algorithms/algorithms/centrality/katz.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ def katz_centrality(
raise GraphBlasAlgorithmException("beta must have a value for every node")

A = G._A
if A.ss.is_iso:
if (iso_value := G.get_property("iso_value")) is not None:
# Fold iso-value into alpha
alpha *= A.ss.iso_value.get(1.0)
alpha *= iso_value.get(1.0)
semiring = plus_first[float]
else:
semiring = plus_times[float]
Expand Down
2 changes: 1 addition & 1 deletion graphblas_algorithms/algorithms/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def k_truss(G: Graph, k) -> Graph:
S = C

# Remove isolate nodes
indices, _ = C.reduce_rowwise(monoid.any).to_coo()
indices, _ = C.reduce_rowwise(monoid.any).to_coo(values=False)
Ktruss = C[indices, indices].new()

# Convert back to networkx graph with correct node ids
Expand Down
4 changes: 2 additions & 2 deletions graphblas_algorithms/algorithms/dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def descendants(G, source):
if source not in G._key_to_id:
raise KeyError(f"The node {source} is not in the graph")
index = G._key_to_id[source]
A = G._A
A = G.get_property("offdiag")
q = Vector.from_coo(index, True, size=A.nrows, name="q")
rv = q.dup(name="descendants")
for _ in range(A.nrows):
Expand All @@ -25,7 +25,7 @@ def ancestors(G, source):
if source not in G._key_to_id:
raise KeyError(f"The node {source} is not in the graph")
index = G._key_to_id[source]
A = G._A
A = G.get_property("offdiag")
q = Vector.from_coo(index, True, size=A.nrows, name="q")
rv = q.dup(name="descendants")
for _ in range(A.nrows):
Expand Down
4 changes: 4 additions & 0 deletions graphblas_algorithms/algorithms/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,7 @@ class EmptyGraphError(GraphBlasAlgorithmException):

class PointlessConcept(GraphBlasAlgorithmException):
pass


class Unbounded(GraphBlasAlgorithmException):
pass
5 changes: 2 additions & 3 deletions graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,10 @@ def pagerank(
row_degrees = G.get_property("plus_rowwise+") # XXX: What about self-edges?
S = (alpha / row_degrees).new(name="S")

if A.ss.is_iso:
if (iso_value := G.get_property("iso_value")) is not None:
# Fold iso-value of A into S
# This lets us use the plus_first semiring, which is faster
iso_value = A.ss.iso_value
if iso_value != 1:
if iso_value.get(1) != 1:
S *= iso_value
semiring = plus_first[float]
else:
Expand Down
1 change: 1 addition & 0 deletions graphblas_algorithms/algorithms/shortest_paths/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .dense import *
from .generic import *
from .weighted import *
2 changes: 1 addition & 1 deletion graphblas_algorithms/algorithms/shortest_paths/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def has_path(G, source, target):
dst = G._key_to_id[target]
if src == dst:
return True
A = G._A
A = G.get_property("offdiag")
q_src = Vector.from_coo(src, True, size=A.nrows, name="q_src")
seen_src = q_src.dup(name="seen_src")
q_dst = Vector.from_coo(dst, True, size=A.nrows, name="q_dst")
Expand Down
207 changes: 207 additions & 0 deletions graphblas_algorithms/algorithms/shortest_paths/weighted.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
import numpy as np
from graphblas import Matrix, Vector, binary, monoid, replace, select, unary
from graphblas.semiring import any_pair, min_plus

from ..exceptions import Unbounded

__all__ = [
"single_source_bellman_ford_path_length",
"bellman_ford_path_lengths",
]


def single_source_bellman_ford_path_length(G, source):
# No need for `is_weighted=` keyword, b/c this is assumed to be weighted (I think)
index = G._key_to_id[source]
if G.get_property("is_iso"):
# If the edges are iso-valued (and positive), then we can simply do level BFS
is_negative, iso_value = G.get_properties("has_negative_edges+ iso_value")
if not is_negative:
d = _bfs_level(G, source, dtype=iso_value.dtype)
if iso_value != 1:
d *= iso_value
return d
# It's difficult to detect negative cycles with BFS
if G._A[index, index].get() is not None:
raise Unbounded("Negative cycle detected.")
if not G.is_directed() and G._A[index, :].nvals > 0:
# For undirected graphs, any negative edge is a cycle
raise Unbounded("Negative cycle detected.")

# Use `offdiag` instead of `A`, b/c self-loops don't contribute to the result,
# and negative self-loops are easy negative cycles to avoid.
# We check if we hit a self-loop negative cycle at the end.
A, has_negative_diagonal = G.get_properties("offdiag has_negative_diagonal")
if A.dtype == bool:
# Should we upcast e.g. INT8 to INT64 as well?
dtype = int
else:
dtype = A.dtype
n = A.nrows
d = Vector(dtype, n, name="single_source_bellman_ford_path_length")
d[index] = 0
cur = d.dup(name="cur")
mask = Vector(bool, n, name="mask")
one = unary.one[bool]
for _i in range(n - 1):
# This is a slightly modified Bellman-Ford algorithm.
# `cur` is the current frontier of values that improved in the previous iteration.
# This means that in this iteration we drop values from `cur` that are not better.
cur << min_plus(cur @ A)

# Mask is True where cur not in d or cur < d
mask << one(cur)
mask(binary.second) << binary.lt(cur & d)

# Drop values from `cur` that didn't improve
cur(mask.V, replace) << cur
if cur.nvals == 0:
break
# Update `d` with values that improved
d(cur.S) << cur
else:
# Check for negative cycle when for loop completes without breaking
cur << min_plus(cur @ A)
mask << binary.lt(cur & d)
if mask.reduce(monoid.lor):
raise Unbounded("Negative cycle detected.")
if has_negative_diagonal:
# We removed diagonal entries above, so check if we visited one with a negative weight
diag = G.get_property("diag")
cur << select.valuelt(diag, 0)
if any_pair(d @ cur):
raise Unbounded("Negative cycle detected.")
return d


def bellman_ford_path_lengths(G, nodes=None, *, expand_output=False):
"""
Parameters
----------
expand_output : bool, default False
When False, the returned Matrix has one row per node in nodes.
When True, the returned Matrix has the same shape as the input Matrix.
"""
# Same algorithms as in `single_source_bellman_ford_path_length`, but with
# `Cur` as a Matrix with each row corresponding to a source node.
if G.get_property("is_iso"):
is_negative, iso_value = G.get_properties("has_negative_edges+ iso_value")
if not is_negative:
D = _bfs_levels(G, nodes, dtype=iso_value.dtype)
if iso_value != 1:
D *= iso_value
if nodes is not None and expand_output and D.ncols != D.nrows:
ids = G.list_to_ids(nodes)
rv = Matrix(D.dtype, D.ncols, D.ncols, name=D.name)
rv[ids, :] = D
return rv
return D
if not G.is_directed():
# For undirected graphs, any negative edge is a cycle
if nodes is not None:
ids = G.list_to_ids(nodes)
if G._A[ids, :].nvals > 0:
raise Unbounded("Negative cycle detected.")
elif G._A.nvals > 0:
raise Unbounded("Negative cycle detected.")

A, has_negative_diagonal = G.get_properties("offdiag has_negative_diagonal")
if A.dtype == bool:
dtype = int
else:
dtype = A.dtype
n = A.nrows
if nodes is None:
# TODO: `D = Vector.from_scalar(0, n, dtype).diag()`
D = Vector(dtype, n, name="bellman_ford_path_lengths_vector")
D << 0
D = D.diag(name="bellman_ford_path_lengths")
else:
ids = G.list_to_ids(nodes)
D = Matrix.from_coo(
np.arange(len(ids), dtype=np.uint64),
ids,
0,
dtype,
nrows=len(ids),
ncols=n,
name="bellman_ford_path_lengths",
)
Cur = D.dup(name="Cur")
Mask = Matrix(bool, D.nrows, D.ncols, name="Mask")
one = unary.one[bool]
for _i in range(n - 1):
Cur << min_plus(Cur @ A)
Mask << one(Cur)
Mask(binary.second) << binary.lt(Cur & D)
Cur(Mask.V, replace) << Cur
if Cur.nvals == 0:
break
D(Cur.S) << Cur
else:
Cur << min_plus(Cur @ A)
Mask << binary.lt(Cur & D)
if Mask.reduce_scalar(monoid.lor):
raise Unbounded("Negative cycle detected.")
if has_negative_diagonal:
diag = G.get_property("diag")
cur = select.valuelt(diag, 0)
if any_pair(D @ cur).nvals > 0:
raise Unbounded("Negative cycle detected.")
if nodes is not None and expand_output and D.ncols != D.nrows:
rv = Matrix(D.dtype, n, n, name=D.name)
rv[ids, :] = D
return rv
return D


def _bfs_level(G, source, *, dtype=int):
if dtype == bool:
dtype = int
index = G._key_to_id[source]
A = G.get_property("offdiag")
n = A.nrows
v = Vector(dtype, n, name="bfs_level")
q = Vector(bool, n, name="q")
v[index] = 0
q[index] = True
any_pair_bool = any_pair[bool]
for i in range(1, n):
q(~v.S, replace) << any_pair_bool(q @ A)
if q.nvals == 0:
break
v(q.S) << i
return v


def _bfs_levels(G, nodes=None, *, dtype=int):
if dtype == bool:
dtype = int
A = G.get_property("offdiag")
n = A.nrows
if nodes is None:
# TODO: `D = Vector.from_scalar(0, n, dtype).diag()`
D = Vector(dtype, n, name="bfs_levels_vector")
D << 0
D = D.diag(name="bfs_levels")
else:
ids = G.list_to_ids(nodes)
D = Matrix.from_coo(
np.arange(len(ids), dtype=np.uint64),
ids,
0,
dtype,
nrows=len(ids),
ncols=n,
name="bfs_levels",
)
Q = Matrix(bool, D.nrows, D.ncols, name="Q")
Q << unary.one[bool](D)
any_pair_bool = any_pair[bool]
for i in range(1, n):
Q(~D.S, replace) << any_pair_bool(Q @ A)
if Q.nvals == 0:
break
D(Q.S) << i
return D
2 changes: 1 addition & 1 deletion graphblas_algorithms/algorithms/simple_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def is_simple_path(G, nodes):
return False
if len(nodes) == 1:
return nodes[0] in G
A = G._A
A = G._A # offdiag instead?
if A.nvals < len(nodes) - 1:
return False
key_to_id = G._key_to_id
Expand Down
20 changes: 18 additions & 2 deletions graphblas_algorithms/classes/_caching.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from graphblas import op
from graphblas import Scalar, dtypes, op
from graphblas.core import operator

NONNEGATIVE_DTYPES = {dtypes.BOOL, dtypes.UINT8, dtypes.UINT16, dtypes.UINT32, dtypes.UINT64}


def get_reduce_to_vector(key, opname, methodname):
op_ = op.from_string(opname)
Expand Down Expand Up @@ -142,7 +144,7 @@ def get_reduction(G, mask=None):
cache[f"{keybase}+"] = cache[key]
return cache[key]

else:
elif key[-1] == "+":

def get_reduction(G, mask=None):
A = G._A
Expand Down Expand Up @@ -170,4 +172,18 @@ def get_reduction(G, mask=None):
cache[f"{keybase}-"] = cache[key]
return cache[key]

elif key.endswith("_diagonal"):

def get_reduction(G, mask=None):
A = G._A
cache = G._cache
if key not in cache:
if not G.get_property("has_self_edges"):
cache[key] = Scalar(op_[A.dtype].return_type, name=key)
else:
cache[key] = G.get_property("diag").reduce(op_).new(name=key)
return cache[key]

else: # pragma: no cover (sanity)
raise RuntimeError()
return get_reduction
4 changes: 2 additions & 2 deletions graphblas_algorithms/classes/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,14 @@ def dict_to_vector(self, d, *, size=None, dtype=None, name=None):
return Vector.from_coo(indices, values, size=size, dtype=dtype, name=name)


def list_to_vector(self, nodes, dtype=bool, *, size=None, name=None):
def list_to_vector(self, nodes, dtype=None, *, values=True, size=None, name=None):
if nodes is None:
return None
if size is None:
size = len(self)
key_to_id = self._key_to_id
index = [key_to_id[key] for key in nodes]
return Vector.from_coo(index, True, size=size, dtype=dtype, name=name)
return Vector.from_coo(index, values, size=size, dtype=dtype, name=name)


def list_to_mask(self, nodes, *, size=None, name="mask"):
Expand Down
Loading

0 comments on commit 1180e1c

Please sign in to comment.