diff --git a/README.md b/README.md index b3aca8d..32039fb 100644 --- a/README.md +++ b/README.md @@ -156,8 +156,17 @@ dispatch pattern shown above. - isolates - number_of_isolates - Link Analysis + - google_matrix - hits - pagerank +- Operators + - compose + - difference + - disjoint_union + - full_join + - intersection + - symmetric_difference + - union - Reciprocity - overall_reciprocity - reciprocity @@ -168,6 +177,7 @@ dispatch pattern shown above. - all_pairs_bellman_ford_path_length - all_pairs_shortest_path_length - floyd_warshall + - floyd_warshall_numpy - floyd_warshall_predecessor_and_distance - has_path - negative_edge_cycle diff --git a/graphblas_algorithms/algorithms/__init__.py b/graphblas_algorithms/algorithms/__init__.py index 303535a..37a47a3 100644 --- a/graphblas_algorithms/algorithms/__init__.py +++ b/graphblas_algorithms/algorithms/__init__.py @@ -10,6 +10,7 @@ from .dominating import * from .isolate import * from .link_analysis import * +from .operators import * from .reciprocity import * from .regular import * from .shortest_paths import * diff --git a/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py b/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py index 9b28fa3..518c09f 100644 --- a/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py +++ b/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py @@ -1,11 +1,12 @@ -from graphblas import Vector +import numpy as np +from graphblas import Matrix, Vector, binary, monoid from graphblas.semiring import plus_first, plus_times from graphblas_algorithms import Graph from graphblas_algorithms.algorithms._helpers import is_converged from graphblas_algorithms.algorithms.exceptions import ConvergenceFailure -__all__ = ["pagerank"] +__all__ = ["pagerank", "google_matrix"] def pagerank( @@ -98,3 +99,65 @@ def pagerank( x.name = name return x raise ConvergenceFailure(max_iter) + + +def google_matrix( + G: Graph, + alpha=0.85, + personalization=None, + nodelist=None, + dangling=None, + name="google_matrix", +) -> Matrix: + A = G._A + ids = G.list_to_ids(nodelist) + if ids is not None: + ids = np.array(ids, np.uint64) + A = A[ids, ids].new(float, name=name) + else: + A = A.dup(float, name=name) + N = A.nrows + if N == 0: + return A + + # Personalization vector or scalar + if personalization is None: + p = 1.0 / N + else: + if ids is not None: + personalization = personalization[ids].new(name="personalization") + denom = personalization.reduce().get(0) + if denom == 0: + raise ZeroDivisionError("personalization sums to 0") + p = (personalization / denom).new(mask=personalization.V, name="p") + + if ids is None or len(ids) == len(G): + nonempty_rows = G.get_property("any_rowwise+") # XXX: What about self-edges? + else: + nonempty_rows = A.reduce_rowwise(monoid.any).new(name="nonempty_rows") + + is_dangling = nonempty_rows.nvals < N + if is_dangling: + empty_rows = (~nonempty_rows.S).new(name="empty_rows") + if dangling is not None: + if ids is not None: + dangling = dangling[ids].new(name="dangling") + dangling_weights = (1.0 / dangling.reduce().get(0) * dangling).new( + mask=dangling.V, name="dangling_weights" + ) + A << binary.first(empty_rows.outer(dangling_weights) | A) + elif personalization is None: + A << binary.first((p * empty_rows) | A) + else: + A << binary.first(empty_rows.outer(p) | A) + + scale = A.reduce_rowwise(monoid.plus).new(float) + scale << alpha / scale + A << scale * A + p *= 1 - alpha + if personalization is None: + # Add a scalar everywhere, which makes A dense + A(binary.plus)[:, :] = p + else: + A << A + p + return A diff --git a/graphblas_algorithms/algorithms/operators/__init__.py b/graphblas_algorithms/algorithms/operators/__init__.py new file mode 100644 index 0000000..6e308b5 --- /dev/null +++ b/graphblas_algorithms/algorithms/operators/__init__.py @@ -0,0 +1 @@ +from .binary import * diff --git a/graphblas_algorithms/algorithms/operators/binary.py b/graphblas_algorithms/algorithms/operators/binary.py new file mode 100644 index 0000000..b2b19ca --- /dev/null +++ b/graphblas_algorithms/algorithms/operators/binary.py @@ -0,0 +1,156 @@ +import numpy as np +from graphblas import Matrix, binary, dtypes, unary + +from ..exceptions import GraphBlasAlgorithmException + +__all__ = [ + "compose", + "difference", + "disjoint_union", + "full_join", + "intersection", + "symmetric_difference", + "union", +] + + +def union(G, H, rename=(), *, name="union"): + if G.is_multigraph() != H.is_multigraph(): + raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.") + if G.is_multigraph(): + raise NotImplementedError("Not yet implemented for multigraphs") + if rename: + prefix = rename[0] + if prefix is not None: + G = type(G)( + G._A, key_to_id={f"{prefix}{key}": val for key, val in G._key_to_id.items()} + ) + if len(rename) > 1: + prefix = rename[1] + if prefix is not None: + H = type(H)( + H._A, key_to_id={f"{prefix}{key}": val for key, val in H._key_to_id.items()} + ) + A = G._A + B = H._A + if not G._key_to_id.keys().isdisjoint(H._key_to_id.keys()): + raise GraphBlasAlgorithmException("The node sets of the graphs are not disjoint.") + C = Matrix(dtypes.unify(A.dtype, B.dtype), A.nrows + B.nrows, A.ncols + B.ncols, name=name) + C[: A.nrows, : A.ncols] = A + C[A.nrows :, A.ncols :] = B + offset = A.nrows + key_to_id = {key: val + offset for key, val in H._key_to_id.items()} + key_to_id.update(G._key_to_id) + return type(G)(C, key_to_id=key_to_id) + + +def disjoint_union(G, H, *, name="disjoint_union"): + if G.is_multigraph() != H.is_multigraph(): + raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.") + if G.is_multigraph(): + raise NotImplementedError("Not yet implemented for multigraphs") + A = G._A + B = H._A + C = Matrix(dtypes.unify(A.dtype, B.dtype), A.nrows + B.nrows, A.ncols + B.ncols, name=name) + C[: A.nrows, : A.ncols] = A + C[A.nrows :, A.ncols :] = B + return type(G)(C) + + +def intersection(G, H, *, name="intersection"): + if G.is_multigraph() != H.is_multigraph(): + raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.") + if G.is_multigraph(): + raise NotImplementedError("Not yet implemented for multigraphs") + keys = sorted(G._key_to_id.keys() & H._key_to_id.keys(), key=G._key_to_id.__getitem__) + ids = np.array(G.list_to_ids(keys), np.uint64) + A = G._A[ids, ids].new() + ids = np.array(H.list_to_ids(keys), np.uint64) + B = H._A[ids, ids].new(dtypes.unify(A.dtype, H._A.dtype), mask=A.S, name=name) + B << unary.one(B) + return type(G)(B, key_to_id=dict(zip(keys, range(len(keys))))) + + +def difference(G, H, *, name="difference"): + if G.is_multigraph() != H.is_multigraph(): + raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.") + if G.is_multigraph(): + raise NotImplementedError("Not yet implemented for multigraphs") + if G._key_to_id.keys() != H._key_to_id.keys(): + raise GraphBlasAlgorithmException("Node sets of graphs not equal") + A = G._A + if G._key_to_id == H._key_to_id: + B = H._A + else: + # Need to perform a permutation + keys = sorted(G._key_to_id, key=G._key_to_id.__getitem__) + ids = np.array(H.list_to_ids(keys), np.uint64) + B = H._A[ids, ids].new() + C = unary.one(A).new(mask=~B.S, name=name) + return type(G)(C, key_to_id=G._key_to_id) + + +def symmetric_difference(G, H, *, name="symmetric_difference"): + if G.is_multigraph() != H.is_multigraph(): + raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.") + if G.is_multigraph(): + raise NotImplementedError("Not yet implemented for multigraphs") + if G._key_to_id.keys() != H._key_to_id.keys(): + raise GraphBlasAlgorithmException("Node sets of graphs not equal") + A = G._A + if G._key_to_id == H._key_to_id: + B = H._A + else: + # Need to perform a permutation + keys = sorted(G._key_to_id, key=G._key_to_id.__getitem__) + ids = np.array(H.list_to_ids(keys), np.uint64) + B = H._A[ids, ids].new() + Mask = binary.pair[bool](A & B).new(name="mask") + C = binary.pair(A | B, left_default=True, right_default=True).new(mask=~Mask.S, name=name) + return type(G)(C, key_to_id=G._key_to_id) + + +def compose(G, H, *, name="compose"): + if G.is_multigraph() != H.is_multigraph(): + raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.") + if G.is_multigraph(): + raise NotImplementedError("Not yet implemented for multigraphs") + A = G._A + B = H._A + if G._key_to_id.keys() == H._key_to_id.keys(): + if G._key_to_id != H._key_to_id: + # Need to perform a permutation + keys = sorted(G._key_to_id, key=G._key_to_id.__getitem__) + ids = np.array(H.list_to_ids(keys), np.uint64) + B = B[ids, ids].new() + C = binary.second(A | B).new(name=name) + key_to_id = G._key_to_id + else: + keys = sorted(G._key_to_id.keys() & H._key_to_id.keys(), key=G._key_to_id.__getitem__) + B = H._A + C = Matrix( + dtypes.unify(A.dtype, B.dtype), + A.nrows + B.nrows - len(keys), + A.ncols + B.ncols - len(keys), + name=name, + ) + C[: A.nrows, : A.ncols] = A + ids1 = np.array(G.list_to_ids(keys), np.uint64) + ids2 = np.array(H.list_to_ids(keys), np.uint64) + C[ids1, ids1] = B[ids2, ids2] + newkeys = sorted(H._key_to_id.keys() - G._key_to_id.keys(), key=H._key_to_id.__getitem__) + ids = np.array(H.list_to_ids(newkeys), np.uint64) + C[A.nrows :, A.ncols :] = B[ids, ids] + # Now make new `key_to_id` + ids += A.nrows + key_to_id = dict(zip(newkeys, ids.tolist())) + key_to_id.update(G._key_to_id) + return type(G)(C, key_to_id=key_to_id) + + +def full_join(G, H, rename=(), *, name="full_join"): + rv = union(G, H, rename, name=name) + nrows, ncols = G._A.shape + rv._A[:nrows, ncols:] = True + rv._A[nrows:, :ncols] = True + return rv diff --git a/graphblas_algorithms/algorithms/shortest_paths/dense.py b/graphblas_algorithms/algorithms/shortest_paths/dense.py index 94282d0..394d1b4 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/dense.py +++ b/graphblas_algorithms/algorithms/shortest_paths/dense.py @@ -1,6 +1,8 @@ from graphblas import Matrix, Vector, binary, indexunary, replace, select from graphblas.semiring import any_plus, any_second +from ..exceptions import GraphBlasAlgorithmException + __all__ = ["floyd_warshall", "floyd_warshall_predecessor_and_distance"] @@ -8,7 +10,9 @@ def floyd_warshall(G, is_weighted=False): return floyd_warshall_predecessor_and_distance(G, is_weighted, compute_predecessors=False)[1] -def floyd_warshall_predecessor_and_distance(G, is_weighted=False, *, compute_predecessors=True): +def floyd_warshall_predecessor_and_distance( + G, is_weighted=False, *, compute_predecessors=True, permutation=None +): # By using `offdiag` instead of `G._A`, we ensure that D will not become dense. # Dense D may be better at times, but not including the diagonal will result in less work. # Typically, Floyd-Warshall algorithms sets the diagonal of D to 0 at the beginning. @@ -19,6 +23,13 @@ def floyd_warshall_predecessor_and_distance(G, is_weighted=False, *, compute_pre nonempty_nodes = binary.pair(row_degrees & column_degrees).new(name="nonempty_nodes") else: A, nonempty_nodes = G.get_properties("U- degrees-") + if permutation is not None: + if len(permutation) != nonempty_nodes.size: + raise GraphBlasAlgorithmException( + "permutation must contain every node in G with no repeats." + ) + A = A[permutation, permutation].new() + nonempty_nodes = nonempty_nodes[permutation].new(name="nonempty_nodes") if A.dtype == bool or not is_weighted: dtype = int diff --git a/graphblas_algorithms/interface.py b/graphblas_algorithms/interface.py index d8430e5..206d19c 100644 --- a/graphblas_algorithms/interface.py +++ b/graphblas_algorithms/interface.py @@ -51,7 +51,16 @@ class Dispatcher: number_of_isolates = nxapi.isolate.number_of_isolates # Link Analysis hits = nxapi.link_analysis.hits_alg.hits + google_matrix = nxapi.link_analysis.pagerank_alg.google_matrix pagerank = nxapi.link_analysis.pagerank_alg.pagerank + # Operators + compose = nxapi.operators.binary.compose + difference = nxapi.operators.binary.difference + disjoint_union = nxapi.operators.binary.disjoint_union + full_join = nxapi.operators.binary.full_join + intersection = nxapi.operators.binary.intersection + symmetric_difference = nxapi.operators.binary.symmetric_difference + union = nxapi.operators.binary.union # Reciprocity overall_reciprocity = nxapi.overall_reciprocity reciprocity = nxapi.reciprocity @@ -60,6 +69,7 @@ class Dispatcher: is_regular = nxapi.regular.is_regular # Shortest Paths floyd_warshall = nxapi.shortest_paths.dense.floyd_warshall + floyd_warshall_numpy = nxapi.shortest_paths.dense.floyd_warshall_numpy floyd_warshall_predecessor_and_distance = ( nxapi.shortest_paths.dense.floyd_warshall_predecessor_and_distance ) @@ -112,10 +122,14 @@ def convert_from_nx(graph, weight=None, *, name=None): @staticmethod def convert_to_nx(obj, *, name=None): + from graphblas import Matrix + from .classes import Graph if isinstance(obj, Graph): obj = obj.to_networkx() + elif isinstance(obj, Matrix): + obj = obj.to_dense(fill_value=False) return obj @staticmethod @@ -127,8 +141,11 @@ def on_start_tests(items): def key(testpath): filename, path = testpath.split(":") - classname, testname = path.split(".") - return (testname, frozenset({classname, filename})) + *names, testname = path.split(".") + if names: + [classname] = names + return (testname, frozenset({classname, filename})) + return (testname, frozenset({filename})) # Reasons to skip tests multi_attributed = "unable to handle multi-attributed graphs" @@ -140,7 +157,22 @@ def key(testpath): key("test_mst.py:TestBoruvka.test_attributes"): multi_attributed, key("test_mst.py:TestBoruvka.test_weight_attribute"): multi_attributed, key("test_dense.py:TestFloyd.test_zero_weight"): multidigraph, + key("test_dense_numpy.py:test_zero_weight"): multidigraph, key("test_weighted.py:TestBellmanFordAndGoldbergRadzik.test_multigraph"): multigraph, + key("test_binary.py:test_compose_multigraph"): multigraph, + key("test_binary.py:test_difference_multigraph_attributes"): multigraph, + key("test_binary.py:test_disjoint_union_multigraph"): multigraph, + key("test_binary.py:test_full_join_multigraph"): multigraph, + key("test_binary.py:test_intersection_multigraph_attributes"): multigraph, + key( + "test_binary.py:test_intersection_multigraph_attributes_node_set_different" + ): multigraph, + key("test_binary.py:test_symmetric_difference_multigraph"): multigraph, + key("test_binary.py:test_union_attributes"): multi_attributed, + # TODO: move failing assertion from `test_union_and_compose` + key("test_binary.py:test_union_and_compose"): multi_attributed, + key("test_binary.py:test_union_multigraph"): multigraph, + key("test_vf2pp.py:test_custom_multigraph4_different_labels"): multigraph, } for item in items: kset = set(item.keywords) diff --git a/graphblas_algorithms/nxapi/__init__.py b/graphblas_algorithms/nxapi/__init__.py index fe5ba87..5ddc1fa 100644 --- a/graphblas_algorithms/nxapi/__init__.py +++ b/graphblas_algorithms/nxapi/__init__.py @@ -9,6 +9,7 @@ from .dominating import * from .isolate import * from .link_analysis import * +from .operators import * from .reciprocity import * from .regular import * from .shortest_paths import * @@ -23,6 +24,7 @@ from . import community from . import components from . import link_analysis +from . import operators from . import shortest_paths from . import tournament from . import traversal diff --git a/graphblas_algorithms/nxapi/link_analysis/pagerank_alg.py b/graphblas_algorithms/nxapi/link_analysis/pagerank_alg.py index d40506f..22e977e 100644 --- a/graphblas_algorithms/nxapi/link_analysis/pagerank_alg.py +++ b/graphblas_algorithms/nxapi/link_analysis/pagerank_alg.py @@ -3,7 +3,7 @@ from ..exception import PowerIterationFailedConvergence -_all = ["pagerank"] +_all = ["pagerank", "google_matrix"] def pagerank( @@ -43,3 +43,21 @@ def pagerank( raise PowerIterationFailedConvergence(*e.args) from e else: return G.vector_to_nodemap(result, fill_value=0.0) + + +def google_matrix( + G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None +): + G = to_graph(G, weight=weight, dtype=float) + p = G.dict_to_vector(personalization, dtype=float, name="personalization") + if dangling is not None and G.get_property("row_degrees+").nvals < len(G): + dangling_weights = G.dict_to_vector(dangling, dtype=float, name="dangling") + else: + dangling_weights = None + return algorithms.google_matrix( + G, + alpha=alpha, + personalization=p, + nodelist=nodelist, + dangling=dangling_weights, + ) diff --git a/graphblas_algorithms/nxapi/operators/__init__.py b/graphblas_algorithms/nxapi/operators/__init__.py new file mode 100644 index 0000000..6e308b5 --- /dev/null +++ b/graphblas_algorithms/nxapi/operators/__init__.py @@ -0,0 +1 @@ +from .binary import * diff --git a/graphblas_algorithms/nxapi/operators/binary.py b/graphblas_algorithms/nxapi/operators/binary.py new file mode 100644 index 0000000..82e8f08 --- /dev/null +++ b/graphblas_algorithms/nxapi/operators/binary.py @@ -0,0 +1,77 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +from ..exception import NetworkXError + +__all__ = [ + "compose", + "difference", + "disjoint_union", + "full_join", + "intersection", + "symmetric_difference", + "union", +] + + +def union(G, H, rename=()): + G = to_graph(G) + H = to_graph(H) + try: + return algorithms.union(G, H, rename=rename) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e + + +def disjoint_union(G, H): + G = to_graph(G) + H = to_graph(H) + try: + return algorithms.disjoint_union(G, H) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e + + +def intersection(G, H): + G = to_graph(G) + H = to_graph(H) + try: + return algorithms.intersection(G, H) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e + + +def difference(G, H): + G = to_graph(G) + H = to_graph(H) + try: + return algorithms.difference(G, H) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e + + +def symmetric_difference(G, H): + G = to_graph(G) + H = to_graph(H) + try: + return algorithms.symmetric_difference(G, H) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e + + +def compose(G, H): + G = to_graph(G) + H = to_graph(H) + try: + return algorithms.compose(G, H) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e + + +def full_join(G, H, rename=()): + G = to_graph(G) + H = to_graph(H) + try: + return algorithms.full_join(G, H, rename=rename) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e diff --git a/graphblas_algorithms/nxapi/shortest_paths/dense.py b/graphblas_algorithms/nxapi/shortest_paths/dense.py index 4b62891..cc86eb7 100644 --- a/graphblas_algorithms/nxapi/shortest_paths/dense.py +++ b/graphblas_algorithms/nxapi/shortest_paths/dense.py @@ -1,7 +1,11 @@ +import numpy as np + from graphblas_algorithms import algorithms from graphblas_algorithms.classes.digraph import to_graph -__all__ = ["floyd_warshall", "floyd_warshall_predecessor_and_distance"] +from ..exception import NetworkXError + +__all__ = ["floyd_warshall", "floyd_warshall_numpy", "floyd_warshall_predecessor_and_distance"] def floyd_warshall(G, weight="weight"): @@ -17,3 +21,19 @@ def floyd_warshall_predecessor_and_distance(G, weight="weight"): G.matrix_to_nodenodemap(P, values_are_keys=True), G.matrix_to_nodenodemap(D, fill_value=float("inf")), ) + + +def floyd_warshall_numpy(G, nodelist=None, weight="weight"): + G = to_graph(G, weight=weight) + if nodelist is not None: + if not (len(nodelist) == len(G) == len(set(nodelist))): + raise NetworkXError("nodelist must contain every node in G with no repeats.") + permutation = np.array(G.list_to_ids(nodelist), np.uint64) + else: + permutation = None + try: + return algorithms.floyd_warshall_predecessor_and_distance( + G, is_weighted=weight is not None, compute_predecessors=False, permutation=permutation + )[1] + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e diff --git a/pyproject.toml b/pyproject.toml index 7811266..df17600 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,6 +99,7 @@ packages = [ "graphblas_algorithms.algorithms.community", "graphblas_algorithms.algorithms.components", "graphblas_algorithms.algorithms.link_analysis", + "graphblas_algorithms.algorithms.operators", "graphblas_algorithms.algorithms.shortest_paths", "graphblas_algorithms.algorithms.tests", "graphblas_algorithms.algorithms.traversal", @@ -108,6 +109,7 @@ packages = [ "graphblas_algorithms.nxapi.community", "graphblas_algorithms.nxapi.components", "graphblas_algorithms.nxapi.link_analysis", + "graphblas_algorithms.nxapi.operators", "graphblas_algorithms.nxapi.shortest_paths", "graphblas_algorithms.nxapi.tests", "graphblas_algorithms.nxapi.traversal",