Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement google_matrix and binary operators #62

Merged
merged 4 commits into from
May 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,17 @@ dispatch pattern shown above.
- isolates
- number_of_isolates
- Link Analysis
- google_matrix
- hits
- pagerank
- Operators
- compose
- difference
- disjoint_union
- full_join
- intersection
- symmetric_difference
- union
- Reciprocity
- overall_reciprocity
- reciprocity
Expand All @@ -168,6 +177,7 @@ dispatch pattern shown above.
- all_pairs_bellman_ford_path_length
- all_pairs_shortest_path_length
- floyd_warshall
- floyd_warshall_numpy
- floyd_warshall_predecessor_and_distance
- has_path
- negative_edge_cycle
Expand Down
1 change: 1 addition & 0 deletions graphblas_algorithms/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .dominating import *
from .isolate import *
from .link_analysis import *
from .operators import *
from .reciprocity import *
from .regular import *
from .shortest_paths import *
Expand Down
67 changes: 65 additions & 2 deletions graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from graphblas import Vector
import numpy as np
from graphblas import Matrix, Vector, binary, monoid
from graphblas.semiring import plus_first, plus_times

from graphblas_algorithms import Graph
from graphblas_algorithms.algorithms._helpers import is_converged
from graphblas_algorithms.algorithms.exceptions import ConvergenceFailure

__all__ = ["pagerank"]
__all__ = ["pagerank", "google_matrix"]


def pagerank(
Expand Down Expand Up @@ -98,3 +99,65 @@ def pagerank(
x.name = name
return x
raise ConvergenceFailure(max_iter)


def google_matrix(
G: Graph,
alpha=0.85,
personalization=None,
nodelist=None,
dangling=None,
name="google_matrix",
) -> Matrix:
A = G._A
ids = G.list_to_ids(nodelist)
if ids is not None:
ids = np.array(ids, np.uint64)
A = A[ids, ids].new(float, name=name)
else:
A = A.dup(float, name=name)
N = A.nrows
if N == 0:
return A

# Personalization vector or scalar
if personalization is None:
p = 1.0 / N
else:
if ids is not None:
personalization = personalization[ids].new(name="personalization")
denom = personalization.reduce().get(0)
if denom == 0:
raise ZeroDivisionError("personalization sums to 0")
p = (personalization / denom).new(mask=personalization.V, name="p")

if ids is None or len(ids) == len(G):
nonempty_rows = G.get_property("any_rowwise+") # XXX: What about self-edges?
else:
nonempty_rows = A.reduce_rowwise(monoid.any).new(name="nonempty_rows")

is_dangling = nonempty_rows.nvals < N
if is_dangling:
empty_rows = (~nonempty_rows.S).new(name="empty_rows")
if dangling is not None:
if ids is not None:
dangling = dangling[ids].new(name="dangling")
dangling_weights = (1.0 / dangling.reduce().get(0) * dangling).new(
mask=dangling.V, name="dangling_weights"
)
A << binary.first(empty_rows.outer(dangling_weights) | A)
elif personalization is None:
A << binary.first((p * empty_rows) | A)
else:
A << binary.first(empty_rows.outer(p) | A)

scale = A.reduce_rowwise(monoid.plus).new(float)
scale << alpha / scale
A << scale * A
p *= 1 - alpha
if personalization is None:
# Add a scalar everywhere, which makes A dense
A(binary.plus)[:, :] = p
else:
A << A + p
return A
1 change: 1 addition & 0 deletions graphblas_algorithms/algorithms/operators/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .binary import *
156 changes: 156 additions & 0 deletions graphblas_algorithms/algorithms/operators/binary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
import numpy as np
from graphblas import Matrix, binary, dtypes, unary

from ..exceptions import GraphBlasAlgorithmException

__all__ = [
"compose",
"difference",
"disjoint_union",
"full_join",
"intersection",
"symmetric_difference",
"union",
]


def union(G, H, rename=(), *, name="union"):
if G.is_multigraph() != H.is_multigraph():
raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.")
if G.is_multigraph():
raise NotImplementedError("Not yet implemented for multigraphs")
if rename:
prefix = rename[0]
if prefix is not None:
G = type(G)(
G._A, key_to_id={f"{prefix}{key}": val for key, val in G._key_to_id.items()}
)
if len(rename) > 1:
prefix = rename[1]
if prefix is not None:
H = type(H)(
H._A, key_to_id={f"{prefix}{key}": val for key, val in H._key_to_id.items()}
)
A = G._A
B = H._A
if not G._key_to_id.keys().isdisjoint(H._key_to_id.keys()):
raise GraphBlasAlgorithmException("The node sets of the graphs are not disjoint.")
C = Matrix(dtypes.unify(A.dtype, B.dtype), A.nrows + B.nrows, A.ncols + B.ncols, name=name)
C[: A.nrows, : A.ncols] = A
C[A.nrows :, A.ncols :] = B
offset = A.nrows
key_to_id = {key: val + offset for key, val in H._key_to_id.items()}
key_to_id.update(G._key_to_id)
return type(G)(C, key_to_id=key_to_id)


def disjoint_union(G, H, *, name="disjoint_union"):
if G.is_multigraph() != H.is_multigraph():
raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.")
if G.is_multigraph():
raise NotImplementedError("Not yet implemented for multigraphs")
A = G._A
B = H._A
C = Matrix(dtypes.unify(A.dtype, B.dtype), A.nrows + B.nrows, A.ncols + B.ncols, name=name)
C[: A.nrows, : A.ncols] = A
C[A.nrows :, A.ncols :] = B
return type(G)(C)


def intersection(G, H, *, name="intersection"):
if G.is_multigraph() != H.is_multigraph():
raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.")
if G.is_multigraph():
raise NotImplementedError("Not yet implemented for multigraphs")
keys = sorted(G._key_to_id.keys() & H._key_to_id.keys(), key=G._key_to_id.__getitem__)
ids = np.array(G.list_to_ids(keys), np.uint64)
A = G._A[ids, ids].new()
ids = np.array(H.list_to_ids(keys), np.uint64)
B = H._A[ids, ids].new(dtypes.unify(A.dtype, H._A.dtype), mask=A.S, name=name)
B << unary.one(B)
return type(G)(B, key_to_id=dict(zip(keys, range(len(keys)))))


def difference(G, H, *, name="difference"):
if G.is_multigraph() != H.is_multigraph():
raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.")
if G.is_multigraph():
raise NotImplementedError("Not yet implemented for multigraphs")
if G._key_to_id.keys() != H._key_to_id.keys():
raise GraphBlasAlgorithmException("Node sets of graphs not equal")
A = G._A
if G._key_to_id == H._key_to_id:
B = H._A
else:
# Need to perform a permutation
keys = sorted(G._key_to_id, key=G._key_to_id.__getitem__)
ids = np.array(H.list_to_ids(keys), np.uint64)
B = H._A[ids, ids].new()
C = unary.one(A).new(mask=~B.S, name=name)
return type(G)(C, key_to_id=G._key_to_id)


def symmetric_difference(G, H, *, name="symmetric_difference"):
if G.is_multigraph() != H.is_multigraph():
raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.")
if G.is_multigraph():
raise NotImplementedError("Not yet implemented for multigraphs")
if G._key_to_id.keys() != H._key_to_id.keys():
raise GraphBlasAlgorithmException("Node sets of graphs not equal")
A = G._A
if G._key_to_id == H._key_to_id:
B = H._A
else:
# Need to perform a permutation
keys = sorted(G._key_to_id, key=G._key_to_id.__getitem__)
ids = np.array(H.list_to_ids(keys), np.uint64)
B = H._A[ids, ids].new()
Mask = binary.pair[bool](A & B).new(name="mask")
C = binary.pair(A | B, left_default=True, right_default=True).new(mask=~Mask.S, name=name)
return type(G)(C, key_to_id=G._key_to_id)


def compose(G, H, *, name="compose"):
if G.is_multigraph() != H.is_multigraph():
raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.")
if G.is_multigraph():
raise NotImplementedError("Not yet implemented for multigraphs")
A = G._A
B = H._A
if G._key_to_id.keys() == H._key_to_id.keys():
if G._key_to_id != H._key_to_id:
# Need to perform a permutation
keys = sorted(G._key_to_id, key=G._key_to_id.__getitem__)
ids = np.array(H.list_to_ids(keys), np.uint64)
B = B[ids, ids].new()
C = binary.second(A | B).new(name=name)
key_to_id = G._key_to_id
else:
keys = sorted(G._key_to_id.keys() & H._key_to_id.keys(), key=G._key_to_id.__getitem__)
B = H._A
C = Matrix(
dtypes.unify(A.dtype, B.dtype),
A.nrows + B.nrows - len(keys),
A.ncols + B.ncols - len(keys),
name=name,
)
C[: A.nrows, : A.ncols] = A
ids1 = np.array(G.list_to_ids(keys), np.uint64)
ids2 = np.array(H.list_to_ids(keys), np.uint64)
C[ids1, ids1] = B[ids2, ids2]
newkeys = sorted(H._key_to_id.keys() - G._key_to_id.keys(), key=H._key_to_id.__getitem__)
ids = np.array(H.list_to_ids(newkeys), np.uint64)
C[A.nrows :, A.ncols :] = B[ids, ids]
# Now make new `key_to_id`
ids += A.nrows
key_to_id = dict(zip(newkeys, ids.tolist()))
key_to_id.update(G._key_to_id)
return type(G)(C, key_to_id=key_to_id)


def full_join(G, H, rename=(), *, name="full_join"):
rv = union(G, H, rename, name=name)
nrows, ncols = G._A.shape
rv._A[:nrows, ncols:] = True
rv._A[nrows:, :ncols] = True
return rv
13 changes: 12 additions & 1 deletion graphblas_algorithms/algorithms/shortest_paths/dense.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
from graphblas import Matrix, Vector, binary, indexunary, replace, select
from graphblas.semiring import any_plus, any_second

from ..exceptions import GraphBlasAlgorithmException

__all__ = ["floyd_warshall", "floyd_warshall_predecessor_and_distance"]


def floyd_warshall(G, is_weighted=False):
return floyd_warshall_predecessor_and_distance(G, is_weighted, compute_predecessors=False)[1]


def floyd_warshall_predecessor_and_distance(G, is_weighted=False, *, compute_predecessors=True):
def floyd_warshall_predecessor_and_distance(
G, is_weighted=False, *, compute_predecessors=True, permutation=None
):
# By using `offdiag` instead of `G._A`, we ensure that D will not become dense.
# Dense D may be better at times, but not including the diagonal will result in less work.
# Typically, Floyd-Warshall algorithms sets the diagonal of D to 0 at the beginning.
Expand All @@ -19,6 +23,13 @@ def floyd_warshall_predecessor_and_distance(G, is_weighted=False, *, compute_pre
nonempty_nodes = binary.pair(row_degrees & column_degrees).new(name="nonempty_nodes")
else:
A, nonempty_nodes = G.get_properties("U- degrees-")
if permutation is not None:
if len(permutation) != nonempty_nodes.size:
raise GraphBlasAlgorithmException(
"permutation must contain every node in G with no repeats."
)
A = A[permutation, permutation].new()
nonempty_nodes = nonempty_nodes[permutation].new(name="nonempty_nodes")

if A.dtype == bool or not is_weighted:
dtype = int
Expand Down
36 changes: 34 additions & 2 deletions graphblas_algorithms/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,16 @@ class Dispatcher:
number_of_isolates = nxapi.isolate.number_of_isolates
# Link Analysis
hits = nxapi.link_analysis.hits_alg.hits
google_matrix = nxapi.link_analysis.pagerank_alg.google_matrix
pagerank = nxapi.link_analysis.pagerank_alg.pagerank
# Operators
compose = nxapi.operators.binary.compose
difference = nxapi.operators.binary.difference
disjoint_union = nxapi.operators.binary.disjoint_union
full_join = nxapi.operators.binary.full_join
intersection = nxapi.operators.binary.intersection
symmetric_difference = nxapi.operators.binary.symmetric_difference
union = nxapi.operators.binary.union
# Reciprocity
overall_reciprocity = nxapi.overall_reciprocity
reciprocity = nxapi.reciprocity
Expand All @@ -60,6 +69,7 @@ class Dispatcher:
is_regular = nxapi.regular.is_regular
# Shortest Paths
floyd_warshall = nxapi.shortest_paths.dense.floyd_warshall
floyd_warshall_numpy = nxapi.shortest_paths.dense.floyd_warshall_numpy
floyd_warshall_predecessor_and_distance = (
nxapi.shortest_paths.dense.floyd_warshall_predecessor_and_distance
)
Expand Down Expand Up @@ -112,10 +122,14 @@ def convert_from_nx(graph, weight=None, *, name=None):

@staticmethod
def convert_to_nx(obj, *, name=None):
from graphblas import Matrix

from .classes import Graph

if isinstance(obj, Graph):
obj = obj.to_networkx()
elif isinstance(obj, Matrix):
obj = obj.to_dense(fill_value=False)
return obj

@staticmethod
Expand All @@ -127,8 +141,11 @@ def on_start_tests(items):

def key(testpath):
filename, path = testpath.split(":")
classname, testname = path.split(".")
return (testname, frozenset({classname, filename}))
*names, testname = path.split(".")
if names:
[classname] = names
return (testname, frozenset({classname, filename}))
return (testname, frozenset({filename}))

# Reasons to skip tests
multi_attributed = "unable to handle multi-attributed graphs"
Expand All @@ -140,7 +157,22 @@ def key(testpath):
key("test_mst.py:TestBoruvka.test_attributes"): multi_attributed,
key("test_mst.py:TestBoruvka.test_weight_attribute"): multi_attributed,
key("test_dense.py:TestFloyd.test_zero_weight"): multidigraph,
key("test_dense_numpy.py:test_zero_weight"): multidigraph,
key("test_weighted.py:TestBellmanFordAndGoldbergRadzik.test_multigraph"): multigraph,
key("test_binary.py:test_compose_multigraph"): multigraph,
key("test_binary.py:test_difference_multigraph_attributes"): multigraph,
key("test_binary.py:test_disjoint_union_multigraph"): multigraph,
key("test_binary.py:test_full_join_multigraph"): multigraph,
key("test_binary.py:test_intersection_multigraph_attributes"): multigraph,
key(
"test_binary.py:test_intersection_multigraph_attributes_node_set_different"
): multigraph,
key("test_binary.py:test_symmetric_difference_multigraph"): multigraph,
key("test_binary.py:test_union_attributes"): multi_attributed,
# TODO: move failing assertion from `test_union_and_compose`
key("test_binary.py:test_union_and_compose"): multi_attributed,
key("test_binary.py:test_union_multigraph"): multigraph,
key("test_vf2pp.py:test_custom_multigraph4_different_labels"): multigraph,
}
for item in items:
kset = set(item.keywords)
Expand Down
Loading