Skip to content

Commit

Permalink
Implement google_matrix and binary operators (#62)
Browse files Browse the repository at this point in the history
* Implement `google_matrix` and binary operators

* Also implement `floyd_warshall_numpy`

* Remove `floyd_warshall_numpy` from "core" (still in "nxapi")
  • Loading branch information
eriknw authored May 3, 2023
1 parent 17f19ba commit 80ba68b
Show file tree
Hide file tree
Showing 13 changed files with 401 additions and 7 deletions.
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,17 @@ dispatch pattern shown above.
- isolates
- number_of_isolates
- Link Analysis
- google_matrix
- hits
- pagerank
- Operators
- compose
- difference
- disjoint_union
- full_join
- intersection
- symmetric_difference
- union
- Reciprocity
- overall_reciprocity
- reciprocity
Expand All @@ -168,6 +177,7 @@ dispatch pattern shown above.
- all_pairs_bellman_ford_path_length
- all_pairs_shortest_path_length
- floyd_warshall
- floyd_warshall_numpy
- floyd_warshall_predecessor_and_distance
- has_path
- negative_edge_cycle
Expand Down
1 change: 1 addition & 0 deletions graphblas_algorithms/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .dominating import *
from .isolate import *
from .link_analysis import *
from .operators import *
from .reciprocity import *
from .regular import *
from .shortest_paths import *
Expand Down
67 changes: 65 additions & 2 deletions graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from graphblas import Vector
import numpy as np
from graphblas import Matrix, Vector, binary, monoid
from graphblas.semiring import plus_first, plus_times

from graphblas_algorithms import Graph
from graphblas_algorithms.algorithms._helpers import is_converged
from graphblas_algorithms.algorithms.exceptions import ConvergenceFailure

__all__ = ["pagerank"]
__all__ = ["pagerank", "google_matrix"]


def pagerank(
Expand Down Expand Up @@ -98,3 +99,65 @@ def pagerank(
x.name = name
return x
raise ConvergenceFailure(max_iter)


def google_matrix(
G: Graph,
alpha=0.85,
personalization=None,
nodelist=None,
dangling=None,
name="google_matrix",
) -> Matrix:
A = G._A
ids = G.list_to_ids(nodelist)
if ids is not None:
ids = np.array(ids, np.uint64)
A = A[ids, ids].new(float, name=name)
else:
A = A.dup(float, name=name)
N = A.nrows
if N == 0:
return A

# Personalization vector or scalar
if personalization is None:
p = 1.0 / N
else:
if ids is not None:
personalization = personalization[ids].new(name="personalization")
denom = personalization.reduce().get(0)
if denom == 0:
raise ZeroDivisionError("personalization sums to 0")
p = (personalization / denom).new(mask=personalization.V, name="p")

if ids is None or len(ids) == len(G):
nonempty_rows = G.get_property("any_rowwise+") # XXX: What about self-edges?
else:
nonempty_rows = A.reduce_rowwise(monoid.any).new(name="nonempty_rows")

is_dangling = nonempty_rows.nvals < N
if is_dangling:
empty_rows = (~nonempty_rows.S).new(name="empty_rows")
if dangling is not None:
if ids is not None:
dangling = dangling[ids].new(name="dangling")
dangling_weights = (1.0 / dangling.reduce().get(0) * dangling).new(
mask=dangling.V, name="dangling_weights"
)
A << binary.first(empty_rows.outer(dangling_weights) | A)
elif personalization is None:
A << binary.first((p * empty_rows) | A)
else:
A << binary.first(empty_rows.outer(p) | A)

scale = A.reduce_rowwise(monoid.plus).new(float)
scale << alpha / scale
A << scale * A
p *= 1 - alpha
if personalization is None:
# Add a scalar everywhere, which makes A dense
A(binary.plus)[:, :] = p
else:
A << A + p
return A
1 change: 1 addition & 0 deletions graphblas_algorithms/algorithms/operators/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .binary import *
156 changes: 156 additions & 0 deletions graphblas_algorithms/algorithms/operators/binary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
import numpy as np
from graphblas import Matrix, binary, dtypes, unary

from ..exceptions import GraphBlasAlgorithmException

__all__ = [
"compose",
"difference",
"disjoint_union",
"full_join",
"intersection",
"symmetric_difference",
"union",
]


def union(G, H, rename=(), *, name="union"):
if G.is_multigraph() != H.is_multigraph():
raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.")
if G.is_multigraph():
raise NotImplementedError("Not yet implemented for multigraphs")
if rename:
prefix = rename[0]
if prefix is not None:
G = type(G)(
G._A, key_to_id={f"{prefix}{key}": val for key, val in G._key_to_id.items()}
)
if len(rename) > 1:
prefix = rename[1]
if prefix is not None:
H = type(H)(
H._A, key_to_id={f"{prefix}{key}": val for key, val in H._key_to_id.items()}
)
A = G._A
B = H._A
if not G._key_to_id.keys().isdisjoint(H._key_to_id.keys()):
raise GraphBlasAlgorithmException("The node sets of the graphs are not disjoint.")
C = Matrix(dtypes.unify(A.dtype, B.dtype), A.nrows + B.nrows, A.ncols + B.ncols, name=name)
C[: A.nrows, : A.ncols] = A
C[A.nrows :, A.ncols :] = B
offset = A.nrows
key_to_id = {key: val + offset for key, val in H._key_to_id.items()}
key_to_id.update(G._key_to_id)
return type(G)(C, key_to_id=key_to_id)


def disjoint_union(G, H, *, name="disjoint_union"):
if G.is_multigraph() != H.is_multigraph():
raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.")
if G.is_multigraph():
raise NotImplementedError("Not yet implemented for multigraphs")
A = G._A
B = H._A
C = Matrix(dtypes.unify(A.dtype, B.dtype), A.nrows + B.nrows, A.ncols + B.ncols, name=name)
C[: A.nrows, : A.ncols] = A
C[A.nrows :, A.ncols :] = B
return type(G)(C)


def intersection(G, H, *, name="intersection"):
if G.is_multigraph() != H.is_multigraph():
raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.")
if G.is_multigraph():
raise NotImplementedError("Not yet implemented for multigraphs")
keys = sorted(G._key_to_id.keys() & H._key_to_id.keys(), key=G._key_to_id.__getitem__)
ids = np.array(G.list_to_ids(keys), np.uint64)
A = G._A[ids, ids].new()
ids = np.array(H.list_to_ids(keys), np.uint64)
B = H._A[ids, ids].new(dtypes.unify(A.dtype, H._A.dtype), mask=A.S, name=name)
B << unary.one(B)
return type(G)(B, key_to_id=dict(zip(keys, range(len(keys)))))


def difference(G, H, *, name="difference"):
if G.is_multigraph() != H.is_multigraph():
raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.")
if G.is_multigraph():
raise NotImplementedError("Not yet implemented for multigraphs")
if G._key_to_id.keys() != H._key_to_id.keys():
raise GraphBlasAlgorithmException("Node sets of graphs not equal")
A = G._A
if G._key_to_id == H._key_to_id:
B = H._A
else:
# Need to perform a permutation
keys = sorted(G._key_to_id, key=G._key_to_id.__getitem__)
ids = np.array(H.list_to_ids(keys), np.uint64)
B = H._A[ids, ids].new()
C = unary.one(A).new(mask=~B.S, name=name)
return type(G)(C, key_to_id=G._key_to_id)


def symmetric_difference(G, H, *, name="symmetric_difference"):
if G.is_multigraph() != H.is_multigraph():
raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.")
if G.is_multigraph():
raise NotImplementedError("Not yet implemented for multigraphs")
if G._key_to_id.keys() != H._key_to_id.keys():
raise GraphBlasAlgorithmException("Node sets of graphs not equal")
A = G._A
if G._key_to_id == H._key_to_id:
B = H._A
else:
# Need to perform a permutation
keys = sorted(G._key_to_id, key=G._key_to_id.__getitem__)
ids = np.array(H.list_to_ids(keys), np.uint64)
B = H._A[ids, ids].new()
Mask = binary.pair[bool](A & B).new(name="mask")
C = binary.pair(A | B, left_default=True, right_default=True).new(mask=~Mask.S, name=name)
return type(G)(C, key_to_id=G._key_to_id)


def compose(G, H, *, name="compose"):
if G.is_multigraph() != H.is_multigraph():
raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.")
if G.is_multigraph():
raise NotImplementedError("Not yet implemented for multigraphs")
A = G._A
B = H._A
if G._key_to_id.keys() == H._key_to_id.keys():
if G._key_to_id != H._key_to_id:
# Need to perform a permutation
keys = sorted(G._key_to_id, key=G._key_to_id.__getitem__)
ids = np.array(H.list_to_ids(keys), np.uint64)
B = B[ids, ids].new()
C = binary.second(A | B).new(name=name)
key_to_id = G._key_to_id
else:
keys = sorted(G._key_to_id.keys() & H._key_to_id.keys(), key=G._key_to_id.__getitem__)
B = H._A
C = Matrix(
dtypes.unify(A.dtype, B.dtype),
A.nrows + B.nrows - len(keys),
A.ncols + B.ncols - len(keys),
name=name,
)
C[: A.nrows, : A.ncols] = A
ids1 = np.array(G.list_to_ids(keys), np.uint64)
ids2 = np.array(H.list_to_ids(keys), np.uint64)
C[ids1, ids1] = B[ids2, ids2]
newkeys = sorted(H._key_to_id.keys() - G._key_to_id.keys(), key=H._key_to_id.__getitem__)
ids = np.array(H.list_to_ids(newkeys), np.uint64)
C[A.nrows :, A.ncols :] = B[ids, ids]
# Now make new `key_to_id`
ids += A.nrows
key_to_id = dict(zip(newkeys, ids.tolist()))
key_to_id.update(G._key_to_id)
return type(G)(C, key_to_id=key_to_id)


def full_join(G, H, rename=(), *, name="full_join"):
rv = union(G, H, rename, name=name)
nrows, ncols = G._A.shape
rv._A[:nrows, ncols:] = True
rv._A[nrows:, :ncols] = True
return rv
13 changes: 12 additions & 1 deletion graphblas_algorithms/algorithms/shortest_paths/dense.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
from graphblas import Matrix, Vector, binary, indexunary, replace, select
from graphblas.semiring import any_plus, any_second

from ..exceptions import GraphBlasAlgorithmException

__all__ = ["floyd_warshall", "floyd_warshall_predecessor_and_distance"]


def floyd_warshall(G, is_weighted=False):
return floyd_warshall_predecessor_and_distance(G, is_weighted, compute_predecessors=False)[1]


def floyd_warshall_predecessor_and_distance(G, is_weighted=False, *, compute_predecessors=True):
def floyd_warshall_predecessor_and_distance(
G, is_weighted=False, *, compute_predecessors=True, permutation=None
):
# By using `offdiag` instead of `G._A`, we ensure that D will not become dense.
# Dense D may be better at times, but not including the diagonal will result in less work.
# Typically, Floyd-Warshall algorithms sets the diagonal of D to 0 at the beginning.
Expand All @@ -19,6 +23,13 @@ def floyd_warshall_predecessor_and_distance(G, is_weighted=False, *, compute_pre
nonempty_nodes = binary.pair(row_degrees & column_degrees).new(name="nonempty_nodes")
else:
A, nonempty_nodes = G.get_properties("U- degrees-")
if permutation is not None:
if len(permutation) != nonempty_nodes.size:
raise GraphBlasAlgorithmException(
"permutation must contain every node in G with no repeats."
)
A = A[permutation, permutation].new()
nonempty_nodes = nonempty_nodes[permutation].new(name="nonempty_nodes")

if A.dtype == bool or not is_weighted:
dtype = int
Expand Down
36 changes: 34 additions & 2 deletions graphblas_algorithms/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,16 @@ class Dispatcher:
number_of_isolates = nxapi.isolate.number_of_isolates
# Link Analysis
hits = nxapi.link_analysis.hits_alg.hits
google_matrix = nxapi.link_analysis.pagerank_alg.google_matrix
pagerank = nxapi.link_analysis.pagerank_alg.pagerank
# Operators
compose = nxapi.operators.binary.compose
difference = nxapi.operators.binary.difference
disjoint_union = nxapi.operators.binary.disjoint_union
full_join = nxapi.operators.binary.full_join
intersection = nxapi.operators.binary.intersection
symmetric_difference = nxapi.operators.binary.symmetric_difference
union = nxapi.operators.binary.union
# Reciprocity
overall_reciprocity = nxapi.overall_reciprocity
reciprocity = nxapi.reciprocity
Expand All @@ -60,6 +69,7 @@ class Dispatcher:
is_regular = nxapi.regular.is_regular
# Shortest Paths
floyd_warshall = nxapi.shortest_paths.dense.floyd_warshall
floyd_warshall_numpy = nxapi.shortest_paths.dense.floyd_warshall_numpy
floyd_warshall_predecessor_and_distance = (
nxapi.shortest_paths.dense.floyd_warshall_predecessor_and_distance
)
Expand Down Expand Up @@ -112,10 +122,14 @@ def convert_from_nx(graph, weight=None, *, name=None):

@staticmethod
def convert_to_nx(obj, *, name=None):
from graphblas import Matrix

from .classes import Graph

if isinstance(obj, Graph):
obj = obj.to_networkx()
elif isinstance(obj, Matrix):
obj = obj.to_dense(fill_value=False)
return obj

@staticmethod
Expand All @@ -127,8 +141,11 @@ def on_start_tests(items):

def key(testpath):
filename, path = testpath.split(":")
classname, testname = path.split(".")
return (testname, frozenset({classname, filename}))
*names, testname = path.split(".")
if names:
[classname] = names
return (testname, frozenset({classname, filename}))
return (testname, frozenset({filename}))

# Reasons to skip tests
multi_attributed = "unable to handle multi-attributed graphs"
Expand All @@ -140,7 +157,22 @@ def key(testpath):
key("test_mst.py:TestBoruvka.test_attributes"): multi_attributed,
key("test_mst.py:TestBoruvka.test_weight_attribute"): multi_attributed,
key("test_dense.py:TestFloyd.test_zero_weight"): multidigraph,
key("test_dense_numpy.py:test_zero_weight"): multidigraph,
key("test_weighted.py:TestBellmanFordAndGoldbergRadzik.test_multigraph"): multigraph,
key("test_binary.py:test_compose_multigraph"): multigraph,
key("test_binary.py:test_difference_multigraph_attributes"): multigraph,
key("test_binary.py:test_disjoint_union_multigraph"): multigraph,
key("test_binary.py:test_full_join_multigraph"): multigraph,
key("test_binary.py:test_intersection_multigraph_attributes"): multigraph,
key(
"test_binary.py:test_intersection_multigraph_attributes_node_set_different"
): multigraph,
key("test_binary.py:test_symmetric_difference_multigraph"): multigraph,
key("test_binary.py:test_union_attributes"): multi_attributed,
# TODO: move failing assertion from `test_union_and_compose`
key("test_binary.py:test_union_and_compose"): multi_attributed,
key("test_binary.py:test_union_multigraph"): multigraph,
key("test_vf2pp.py:test_custom_multigraph4_different_labels"): multigraph,
}
for item in items:
kset = set(item.keywords)
Expand Down
Loading

0 comments on commit 80ba68b

Please sign in to comment.