Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add {all_pairs,single_source}_bellman_ford_path_length #44

Merged
merged 13 commits into from
Feb 17, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions graphblas_algorithms/algorithms/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,7 @@ class EmptyGraphError(GraphBlasAlgorithmException):

class PointlessConcept(GraphBlasAlgorithmException):
pass


class Unbounded(GraphBlasAlgorithmException):
pass
1 change: 1 addition & 0 deletions graphblas_algorithms/algorithms/shortest_paths/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .dense import *
from .generic import *
from .weighted import *
100 changes: 100 additions & 0 deletions graphblas_algorithms/algorithms/shortest_paths/weighted.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import numpy as np
from graphblas import Matrix, Vector, binary, monoid, replace, unary
from graphblas.semiring import min_plus

from ..exceptions import Unbounded

__all__ = [
"single_source_bellman_ford_path_length",
"bellman_ford_path_lengths",
]


def single_source_bellman_ford_path_length(G, source):
# No need for `is_weighted=` keyword, b/c this is assumed to be weighted (I think)
index = G._key_to_id[source]
A = G._A
if A.dtype == bool:
# Should we upcast e.g. INT8 to INT64 as well?
dtype = int
else:
dtype = A.dtype
n = A.nrows
d = Vector(dtype, n, name="single_source_bellman_ford_path_length")
d[index] = 0
cur = d.dup(name="cur")
mask = Vector(bool, n, name="mask")
one = unary.one[bool]
for _i in range(n - 1):
# This is a slightly modified Bellman-Ford algorithm.
# `cur` is the current frontier of values that improved in the previous iteration.
# This means that in this iteration we drop values from `cur` that are not better.
cur << min_plus(cur @ A)

# Mask is True where cur not in d or cur < d
mask << one(cur)
mask(binary.second) << binary.lt(cur & d)

# Drop values from `cur` that didn't improve
cur(mask.V, replace) << cur
if cur.nvals == 0:
break
# Update `d` with values that improved
d(cur.S) << cur
else:
# Check for negative cycle when for loop completes without breaking
cur << min_plus(cur @ A)
mask << binary.lt(cur & d)
if mask.reduce(monoid.lor):
raise Unbounded("Negative cycle detected.")
return d


def bellman_ford_path_lengths(G, nodes=None, *, expand_output=False):
"""

Parameters
----------
expand_output : bool, default False
When False, the returned Matrix has one row per node in nodes.
When True, the returned Matrix has the same shape as the input Matrix.
"""
# Same algorithms as in `single_source_bellman_ford_path_length`, but with
# `Cur` as a Matrix with each row corresponding to a source node.
A = G._A
if A.dtype == bool:
dtype = int
else:
dtype = A.dtype
n = A.nrows
if nodes is None:
# TODO: `D = Vector.from_iso_value(0, n, dtype).diag()`
D = Vector(dtype, n, name="bellman_ford_path_lengths_vector")
D << 0
D = D.diag(name="bellman_ford_path_lengths")
else:
ids = G.list_to_ids(nodes)
D = Matrix.from_coo(
np.arange(len(ids), dtype=np.uint64), ids, 0, dtype, nrows=len(ids), ncols=n
)
Cur = D.dup(name="Cur")
Mask = Matrix(bool, D.nrows, D.ncols, name="Mask")
one = unary.one[bool]
for _i in range(n - 1):
Cur << min_plus(Cur @ A)
Mask << one(Cur)
Mask(binary.second) << binary.lt(Cur & D)
Cur(Mask.V, replace) << Cur
if Cur.nvals == 0:
break
D(Cur.S) << Cur
else:
Cur << min_plus(Cur @ A)
Mask << binary.lt(Cur & D)
if Mask.reduce_scalar(monoid.lor):
raise Unbounded("Negative cycle detected.")
if nodes is not None and expand_output:
rv = Matrix(D.dtype, n, n, name=D.name)
rv[ids, :] = D
return rv
return D
4 changes: 2 additions & 2 deletions graphblas_algorithms/classes/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,14 @@ def dict_to_vector(self, d, *, size=None, dtype=None, name=None):
return Vector.from_coo(indices, values, size=size, dtype=dtype, name=name)


def list_to_vector(self, nodes, dtype=bool, *, size=None, name=None):
def list_to_vector(self, nodes, dtype=None, *, values=True, size=None, name=None):
if nodes is None:
return None
if size is None:
size = len(self)
key_to_id = self._key_to_id
index = [key_to_id[key] for key in nodes]
return Vector.from_coo(index, True, size=size, dtype=dtype, name=name)
return Vector.from_coo(index, values, size=size, dtype=dtype, name=name)


def list_to_mask(self, nodes, *, size=None, name="mask"):
Expand Down
24 changes: 20 additions & 4 deletions graphblas_algorithms/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ class Dispatcher:
nxapi.shortest_paths.dense.floyd_warshall_predecessor_and_distance
)
has_path = nxapi.shortest_paths.generic.has_path
all_pairs_bellman_ford_path_length = (
nxapi.shortest_paths.weighted.all_pairs_bellman_ford_path_length
)
single_source_bellman_ford_path_length = (
nxapi.shortest_paths.weighted.single_source_bellman_ford_path_length
)
# Simple Paths
is_simple_path = nxapi.simple_paths.is_simple_path
# S Metric
Expand Down Expand Up @@ -103,13 +109,23 @@ def on_start_tests(items):
import pytest
except ImportError: # pragma: no cover (import)
return

def key(testpath):
filename, path = testpath.split(":")
classname, testname = path.split(".")
return (testname, frozenset({classname, filename}))

# Reasons to skip tests
multi_attributed = "unable to handle multi-attributed graphs"
multidigraph = "unable to handle MultiDiGraph"
freeze = frozenset
multigraph = "unable to handle MultiGraph"

# Which tests to skip
skip = {
("test_attributes", freeze({"TestBoruvka", "test_mst.py"})): multi_attributed,
("test_weight_attribute", freeze({"TestBoruvka", "test_mst.py"})): multi_attributed,
("test_zero_weight", freeze({"TestFloyd", "test_dense.py"})): multidigraph,
key("test_mst.py:TestBoruvka.test_attributes"): multi_attributed,
key("test_mst.py:TestBoruvka.test_weight_attribute"): multi_attributed,
key("test_dense.py:TestFloyd.test_zero_weight"): multidigraph,
key("test_weighted.py:TestBellmanFordAndGoldbergRadzik.test_multigraph"): multigraph,
}
for item in items:
kset = set(item.keywords)
Expand Down
4 changes: 4 additions & 0 deletions graphblas_algorithms/nxapi/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ class NetworkXError(Exception):
class NetworkXPointlessConcept(Exception):
pass

class NetworkXUnbounded(Exception):
pass

class NodeNotFound(Exception):
pass

Expand All @@ -18,6 +21,7 @@ class PowerIterationFailedConvergence(Exception):
from networkx import (
NetworkXError,
NetworkXPointlessConcept,
NetworkXUnbounded,
NodeNotFound,
PowerIterationFailedConvergence,
)
Expand Down
1 change: 1 addition & 0 deletions graphblas_algorithms/nxapi/shortest_paths/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .dense import *
from .generic import *
from .weighted import *
47 changes: 47 additions & 0 deletions graphblas_algorithms/nxapi/shortest_paths/weighted.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from graphblas_algorithms import algorithms
from graphblas_algorithms.classes.digraph import to_graph

from ..exception import NetworkXUnbounded, NodeNotFound

__all__ = [
"all_pairs_bellman_ford_path_length",
"single_source_bellman_ford_path_length",
]


def all_pairs_bellman_ford_path_length(G, weight="weight", *, chunksize=128):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're adding a new keyword-only argument to nxapi. @jim22k, do you like chunksize for this?

We have a similar extra keyword-argument nchunks in nxapi.square_clustering that controls how much memory is used during calculation.

Neither one, nchunks or chunksize, changes the result; they're only used to control memory use and performance via available parallelism.

# Larger chunksize offers more parallelism, but uses more memory.
G = to_graph(G, weight=weight)
if chunksize < 2:
for source in G:
try:
d = algorithms.single_source_bellman_ford_path_length(G, source)
except algorithms.exceptions.Unbounded as e:
raise NetworkXUnbounded(*e.args) from e
yield (source, G.vector_to_nodemap(d))
else:
# We should probably make a utility function for chunking
nodes = list(G)
for start, stop in zip(
range(0, len(nodes), chunksize), range(chunksize, len(nodes) + chunksize, chunksize)
):
cur_nodes = nodes[start:stop]
try:
D = algorithms.bellman_ford_path_lengths(G, cur_nodes)
except algorithms.exceptions.Unbounded as e:
raise NetworkXUnbounded(*e.args) from e
for i, source in enumerate(cur_nodes):
d = D[i, :].new(name=f"all_pairs_bellman_ford_path_length_{i}")
yield (source, G.vector_to_nodemap(d))


def single_source_bellman_ford_path_length(G, source, weight="weight"):
# TODO: what if weight is a function?
G = to_graph(G, weight=weight)
try:
d = algorithms.single_source_bellman_ford_path_length(G, source)
except algorithms.exceptions.Unbounded as e:
raise NetworkXUnbounded(*e.args) from e
except KeyError as e:
raise NodeNotFound(*e.args) from e
return G.vector_to_nodemap(d)
3 changes: 3 additions & 0 deletions scripts/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ def stime(time):
}
# Is square_clustering undirected only? graphblas-algorthms doesn't implement it for directed
undirected_only = {"generalized_degree", "k_truss", "triangles", "square_clustering"}
returns_iterators = {"all_pairs_bellman_ford_path_length", "isolates"}


def getfunction(functionname, backend):
Expand Down Expand Up @@ -151,6 +152,8 @@ def main(dataname, backend, functionname, time=3.0, n=None, extra=None, display=
benchstring = functioncall.get(functionname, "func(G)")
if extra is not None:
benchstring = f"{benchstring[:-1]}, {extra})"
if functionname in returns_iterators:
benchstring = f"for _ in {benchstring}: pass"
globals = {"func": func, "G": G}
if functionname in poweriteration:
benchstring = f"try:\n {benchstring}\nexcept exc:\n pass"
Expand Down
11 changes: 11 additions & 0 deletions scripts/scipy_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,14 @@ def pagerank(
return x
# return dict(zip(nodelist, map(float, x)))
raise nx.PowerIterationFailedConvergence(max_iter)


def all_pairs_bellman_ford_path_length(A, weight="weight"):
for source in range(A.shape[0]):
d = single_source_bellman_ford_path_length(A, source)
yield (source, d)


def single_source_bellman_ford_path_length(A, source, weight="weight"):
return scipy.sparse.csgraph.bellman_ford(A, indices=source) # So slow!
# return scipy.sparse.csgraph.dijkstra(A, indices=source) # Faster