diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 862dfcd..94612c9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,12 +20,12 @@ repos: - id: mixed-line-ending - id: trailing-whitespace - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.11 + rev: v0.12.1 hooks: - id: validate-pyproject name: Validate pyproject.toml - repo: https://github.com/myint/autoflake - rev: v2.0.0 + rev: v2.0.1 hooks: - id: autoflake args: [--in-place] @@ -44,7 +44,7 @@ repos: - id: auto-walrus args: [--line-length, "100"] - repo: https://github.com/psf/black - rev: 22.12.0 + rev: 23.1.0 hooks: - id: black args: [--target-version=py38] diff --git a/graphblas_algorithms/algorithms/shortest_paths/dense.py b/graphblas_algorithms/algorithms/shortest_paths/dense.py index f53814f..94282d0 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/dense.py +++ b/graphblas_algorithms/algorithms/shortest_paths/dense.py @@ -1,47 +1,100 @@ -from graphblas import Matrix, Vector, binary -from graphblas.select import offdiag -from graphblas.semiring import any_plus +from graphblas import Matrix, Vector, binary, indexunary, replace, select +from graphblas.semiring import any_plus, any_second -__all__ = ["floyd_warshall"] +__all__ = ["floyd_warshall", "floyd_warshall_predecessor_and_distance"] def floyd_warshall(G, is_weighted=False): + return floyd_warshall_predecessor_and_distance(G, is_weighted, compute_predecessors=False)[1] + + +def floyd_warshall_predecessor_and_distance(G, is_weighted=False, *, compute_predecessors=True): # By using `offdiag` instead of `G._A`, we ensure that D will not become dense. # Dense D may be better at times, but not including the diagonal will result in less work. # Typically, Floyd-Warshall algorithms sets the diagonal of D to 0 at the beginning. # This is unnecessary with sparse matrices, and we set the diagonal to 0 at the end. # We also don't iterate over index `i` if either row i or column i are empty. - if G.is_directed(): + if is_directed := G.is_directed(): A, row_degrees, column_degrees = G.get_properties("offdiag row_degrees- column_degrees-") nonempty_nodes = binary.pair(row_degrees & column_degrees).new(name="nonempty_nodes") else: - A, nonempty_nodes = G.get_properties("offdiag degrees-") + A, nonempty_nodes = G.get_properties("U- degrees-") if A.dtype == bool or not is_weighted: dtype = int else: dtype = A.dtype n = A.nrows - D = Matrix(dtype, nrows=n, ncols=n, name="floyd_warshall") + D = Matrix(dtype, nrows=n, ncols=n, name="floyd_warshall_dist") if is_weighted: D << A else: D(A.S) << 1 # Like `D << unary.one[int](A)` del A - Row = Matrix(dtype, nrows=1, ncols=n, name="Row") - Col = Matrix(dtype, nrows=n, ncols=1, name="Col") + if is_directed: + Col = Matrix(dtype, nrows=n, ncols=1, name="Col") + else: + Col = None Outer = Matrix(dtype, nrows=n, ncols=n, name="Outer") + if compute_predecessors: + Mask = Matrix(bool, nrows=n, ncols=n, name="Mask") + P = indexunary.rowindex(D).new(name="floyd_warshall_pred") + if P.dtype == dtype: + P_row = Row + else: + P_row = Matrix(P.dtype, nrows=1, ncols=n, name="P_row") + else: + Mask = P = P_row = None + for i in nonempty_nodes: - Col << D[:, [i]] Row << D[[i], :] + if is_directed: + Col << D[:, [i]] + else: + Row(binary.any) << D.T[[i], :] + Col = Row.T Outer << any_plus(Col @ Row) # Like `col.outer(row, binary.plus)` - D(binary.min) << offdiag(Outer) + + if not compute_predecessors: + # It is faster (approx 10%-30%) to use a mask as is done below when computing + # predecessors, but we choose to use less memory here by not using a mask. + if is_directed: + D(binary.min) << select.offdiag(Outer) + else: + D(binary.min) << select.triu(Outer, 1) + else: + # Update Outer to only include off-diagonal values that will update D and P. + if is_directed: + Mask << indexunary.offdiag(Outer) + else: + Mask << indexunary.triu(Outer, 1) + Mask(binary.second) << binary.lt(Outer & D) + Outer(Mask.V, replace) << Outer + + # Update distances; like `D(binary.min) << offdiag(any_plus(Col @ Row))` + D(Outer.S) << Outer + + # Broadcast predecessors in P_row to updated values + P_row << P[[i], :] + if not is_directed: + P_row(binary.any) << P.T[[i], :] + Col = P_row.T + P(Outer.S) << any_second(Col @ P_row) + del Outer, Mask, Col, Row, P_row + + if not is_directed: + # Symmetrize the results. + # It may be nice to be able to return these as upper-triangular. + D(binary.any) << D.T + if compute_predecessors: + P(binary.any) << P.T # Set diagonal values to 0 (this way seems fast). # The missing values are implied to be infinity, so we set diagonals explicitly to 0. - mask = Vector(bool, size=n, name="mask") - mask << True - Mask = mask.diag(name="Mask") - D(Mask.S) << 0 - return D + diag_mask = Vector(bool, size=n, name="diag_mask") + diag_mask << True + Diag_mask = diag_mask.diag(name="Diag_mask") + D(Diag_mask.S) << 0 + + return P, D diff --git a/graphblas_algorithms/classes/_utils.py b/graphblas_algorithms/classes/_utils.py index 237dcf3..629316a 100644 --- a/graphblas_algorithms/classes/_utils.py +++ b/graphblas_algorithms/classes/_utils.py @@ -109,26 +109,27 @@ def set_to_vector(self, nodes, dtype=bool, *, ignore_extra=False, size=None, nam return Vector.from_coo(index, True, size=size, dtype=dtype, name=name) -def vector_to_dict(self, v, *, mask=None, fillvalue=None): +def vector_to_dict(self, v, *, mask=None, fill_value=None): if mask is not None: - if fillvalue is not None and v.nvals < mask.parent.nvals: - v(mask, binary.first) << fillvalue - elif fillvalue is not None and v.nvals < v.size: - v(mask=~v.S) << fillvalue + if fill_value is not None and v.nvals < mask.parent.nvals: + v(mask, binary.first) << fill_value + elif fill_value is not None and v.nvals < v.size: + v(mask=~v.S) << fill_value id_to_key = self.id_to_key return {id_to_key[index]: value for index, value in zip(*v.to_coo(sort=False))} -def vector_to_nodemap(self, v, *, mask=None, fillvalue=None): +def vector_to_nodemap(self, v, *, mask=None, fill_value=None, values_are_keys=False): from .nodemap import NodeMap if mask is not None: - if fillvalue is not None and v.nvals < mask.parent.nvals: - v(mask, binary.first) << fillvalue - elif fillvalue is not None and v.nvals < v.size: - v(mask=~v.S) << fillvalue + if fill_value is not None and v.nvals < mask.parent.nvals: + v(mask, binary.first) << fill_value + fill_value = None - rv = NodeMap(v, key_to_id=self._key_to_id) + rv = NodeMap( + v, fill_value=fill_value, values_are_keys=values_are_keys, key_to_id=self._key_to_id + ) rv._id_to_key = self._id_to_key return rv @@ -147,7 +148,25 @@ def vector_to_set(self, v): return {id_to_key[index] for index in indices} -def matrix_to_dicts(self, A, *, use_row_index=False, use_column_index=False): +def matrix_to_nodenodemap(self, A, *, fill_value=None, values_are_keys=False): + from .nodemap import NodeNodeMap + + rv = NodeNodeMap( + A, fill_value=fill_value, values_are_keys=values_are_keys, key_to_id=self._key_to_id + ) + rv._id_to_key = self._id_to_key + return rv + + +def matrix_to_vectornodemap(self, A): + from .nodemap import VectorNodeMap + + rv = VectorNodeMap(A, key_to_id=self._key_to_id) + rv._id_to_key = self._id_to_key + return rv + + +def matrix_to_dicts(self, A, *, use_row_index=False, use_column_index=False, values_are_keys=False): """Convert a Matrix to a dict of dicts of the form ``{row: {col: val}}`` Use ``use_row_index=True`` to return the row index as keys in the dict, @@ -167,6 +186,8 @@ def matrix_to_dicts(self, A, *, use_row_index=False, use_column_index=False): indptr = d["indptr"] values = d["values"].tolist() id_to_key = self.id_to_key + if values_are_keys: + values = [id_to_key[val] for val in values] it = zip(rows, np.lib.stride_tricks.sliding_window_view(indptr, 2).tolist()) if use_row_index and use_column_index: return { diff --git a/graphblas_algorithms/classes/digraph.py b/graphblas_algorithms/classes/digraph.py index 167bd29..0984dcd 100644 --- a/graphblas_algorithms/classes/digraph.py +++ b/graphblas_algorithms/classes/digraph.py @@ -569,6 +569,8 @@ def __init__(self, incoming_graph_data=None, *, key_to_id=None, **attr): list_to_mask = _utils.list_to_mask list_to_ids = _utils.list_to_ids matrix_to_dicts = _utils.matrix_to_dicts + matrix_to_nodenodemap = _utils.matrix_to_nodenodemap + matrix_to_vectornodemap = _utils.matrix_to_vectornodemap set_to_vector = _utils.set_to_vector to_networkx = _utils.to_networkx vector_to_dict = _utils.vector_to_dict diff --git a/graphblas_algorithms/classes/graph.py b/graphblas_algorithms/classes/graph.py index a8f14f3..65e4cea 100644 --- a/graphblas_algorithms/classes/graph.py +++ b/graphblas_algorithms/classes/graph.py @@ -275,6 +275,8 @@ def __init__(self, incoming_graph_data=None, *, key_to_id=None, **attr): list_to_ids = _utils.list_to_ids list_to_keys = _utils.list_to_keys matrix_to_dicts = _utils.matrix_to_dicts + matrix_to_nodenodemap = _utils.matrix_to_nodenodemap + matrix_to_vectornodemap = _utils.matrix_to_vectornodemap set_to_vector = _utils.set_to_vector to_networkx = _utils.to_networkx vector_to_dict = _utils.vector_to_dict diff --git a/graphblas_algorithms/classes/nodemap.py b/graphblas_algorithms/classes/nodemap.py index 7878772..ab24fdc 100644 --- a/graphblas_algorithms/classes/nodemap.py +++ b/graphblas_algorithms/classes/nodemap.py @@ -6,13 +6,15 @@ class NodeMap(MutableMapping): - def __init__(self, v, *, key_to_id=None): + def __init__(self, v, *, fill_value=None, values_are_keys=False, key_to_id=None): self.vector = v if key_to_id is None: self._key_to_id = {i: i for i in range(v.size)} else: self._key_to_id = key_to_id self._id_to_key = None + self._fill_value = fill_value + self._values_are_keys = values_are_keys id_to_key = property(_utils.id_to_key) # get_property = _utils.get_property @@ -39,38 +41,60 @@ def __delitem__(self, key): def __getitem__(self, key): idx = self._key_to_id[key] if (rv := self.vector.get(idx)) is not None: + if self._values_are_keys: + return self.id_to_key[rv] return rv + if self._fill_value is not None: + return self._fill_value raise KeyError(key) def __iter__(self): + if self._fill_value is not None: + return iter(self._key_to_id) # Slow if we iterate over one; fast if we iterate over all return map( self.id_to_key.__getitem__, self.vector.to_coo(values=False, sort=False)[0].tolist() ) def __len__(self): + if self._fill_value is not None: + return len(self._key_to_id) return self.vector.nvals def __setitem__(self, key, val): idx = self._key_to_id[key] + if self._values_are_keys: + val = self._key_to_id[val] self.vector[idx] = val # Override other MutableMapping methods def __contains__(self, key): idx = self._key_to_id[key] - return idx in self.vector + return self._fill_value is not None or idx in self.vector def __eq__(self, other): if isinstance(other, NodeMap): - return self.vector.isequal(other.vector) and self._key_to_id == other._key_to_id + return ( + self._values_are_keys == other._values_are_keys + and self._fill_value == other._fill_value + and self.vector.isequal(other.vector) + and self._key_to_id == other._key_to_id + ) return super().__eq__(other) def clear(self): self.vector.clear() + self._fill_value = None def get(self, key, default=None): idx = self._key_to_id[key] - return self.vector.get(idx, default) + rv = self.vector.get(idx) + if rv is None: + if self._fill_value is not None: + return self._fill_value + return default + if self._values_are_keys: + return self.id_to_key[rv] # items # keys @@ -83,13 +107,20 @@ def popitem(self): except StopIteration: raise KeyError from None del v[idx] + if self._values_are_keys: + value = self.id_to_key[value] return self.id_to_key[idx], value def setdefault(self, key, default=None): idx = self._key_to_id[key] if (value := self.vector.get(idx)) is not None: + if self._values_are_keys: + return self.id_to_key[value] return value - self.vector[idx] = default + if self._fill_value is not None: + return self._fill_value + if default is not None: + self.vector[idx] = default return default # update @@ -272,3 +303,141 @@ def popitem(self): # setdefault # update # values + + +class NodeNodeMap(MutableMapping): + def __init__(self, A, *, fill_value=None, values_are_keys=False, key_to_id=None): + self.matrix = A + if key_to_id is None: + self._key_to_id = {i: i for i in range(A.size)} + else: + self._key_to_id = key_to_id + self._id_to_key = None + self._rows = None + self._fill_value = fill_value + self._values_are_keys = values_are_keys + + def _get_rows(self): + if self._rows is None: + self._rows = self.matrix.reduce_rowwise(monoid.any).new() + self._rows(self._rows.S) << 1 # Make iso-valued + return self._rows + + id_to_key = property(_utils.id_to_key) + # get_property = _utils.get_property + # get_properties = _utils.get_properties + dict_to_vector = _utils.dict_to_vector + list_to_vector = _utils.list_to_vector + list_to_mask = _utils.list_to_mask + list_to_ids = _utils.list_to_ids + list_to_keys = _utils.list_to_keys + matrix_to_dicts = _utils.matrix_to_dicts + set_to_vector = _utils.set_to_vector + # to_networkx = _utils.to_networkx + vector_to_dict = _utils.vector_to_dict + vector_to_nodemap = _utils.vector_to_nodemap + vector_to_nodeset = _utils.vector_to_nodeset + vector_to_set = _utils.vector_to_set + # _cacheit = _utils._cacheit + + # Requirements for MutableMapping + def __delitem__(self, key): + idx = self._key_to_id[key] + del self.matrix[idx, :] + if self._rows is not None: + del self._rows[idx] + + def __getitem__(self, key): + idx = self._key_to_id[key] + if self._fill_value is None and self._get_rows().get(idx) is None: + raise KeyError(key) + return self.vector_to_nodemap( + self.matrix[idx, :].new(), + fill_value=self._fill_value, + values_are_keys=self._values_are_keys, + ) + + def __iter__(self): + if self._fill_value is not None: + return iter(self._key_to_id) + # Slow if we iterate over one; fast if we iterate over all + return map( + self.id_to_key.__getitem__, + self._get_rows().to_coo(values=False, sort=False)[0].tolist(), + ) + + def __len__(self): + if self._fill_value is not None: + return len(self._key_to_id) + return self._get_rows().nvals + + def __setitem__(self, key, val): + idx = self._key_to_id[key] + if isinstance(val, NodeMap): + # TODO: check val._key_to_id? + val = val.vector + elif isinstance(val, dict): + val = Vector.from_dict(val, self.matrix.dtype, size=self.matrix.ncols) + else: + raise TypeError() + if val.nvals == 0: + del self.matrix[idx, :] + if self._rows is not None: + del self._rows[idx] + else: + self.matrix[idx, :] = val + if self._rows is not None: + self._rows[idx] = 1 + + # Override other MutableMapping methods + def __contains__(self, key): + idx = self._key_to_id[key] + return self._fill_value is not None or idx in self._get_rows() + + def __eq__(self, other): + if isinstance(other, NodeNodeMap): + return ( + self._fill_value == other._fill_value + and self._values_are_keys == other._values_are_keys + and self.matrix.isequal(other.matrix) + and self._key_to_id == other._key_to_id + ) + return super().__eq__(other) + + def clear(self): + self.matrix.clear() + self._rows = None + self._fill_value = None + + def get(self, key, default=None): + idx = self._key_to_id[key] + if self._fill_value is None and self._get_rows().get(idx) is None: + return default + self.vector_to_nodemap( + self.matrix[idx, :].new(), + fill_value=self._fill_value, + values_are_keys=self._values_are_keys, + ) + + # items + # keys + # pop + + def popitem(self): + rows = self._get_rows() + try: + idx = next(rows.ss.iterkeys()) + except StopIteration: + raise KeyError from None + value = self.vector_to_nodemap( + self.matrix[idx, :].new(), + fill_value=self._fill_value, + values_are_keys=self._values_are_keys, + ) + del self.matrix[idx, :] + del rows[idx] + return self.id_to_key[idx], value + + # setdefault + # update + # values diff --git a/graphblas_algorithms/interface.py b/graphblas_algorithms/interface.py index ddc1091..e88cc4f 100644 --- a/graphblas_algorithms/interface.py +++ b/graphblas_algorithms/interface.py @@ -56,6 +56,9 @@ class Dispatcher: is_regular = nxapi.regular.is_regular # Shortest Paths floyd_warshall = nxapi.shortest_paths.dense.floyd_warshall + floyd_warshall_predecessor_and_distance = ( + nxapi.shortest_paths.dense.floyd_warshall_predecessor_and_distance + ) has_path = nxapi.shortest_paths.generic.has_path # Simple Paths is_simple_path = nxapi.simple_paths.is_simple_path diff --git a/graphblas_algorithms/nxapi/centrality/degree_alg.py b/graphblas_algorithms/nxapi/centrality/degree_alg.py index 5a43827..66776b0 100644 --- a/graphblas_algorithms/nxapi/centrality/degree_alg.py +++ b/graphblas_algorithms/nxapi/centrality/degree_alg.py @@ -8,18 +8,18 @@ def degree_centrality(G): G = to_graph(G) result = algorithms.degree_centrality(G) - return G.vector_to_nodemap(result, fillvalue=0.0) + return G.vector_to_nodemap(result, fill_value=0.0) @not_implemented_for("undirected") def in_degree_centrality(G): G = to_directed_graph(G) result = algorithms.in_degree_centrality(G) - return G.vector_to_nodemap(result, fillvalue=0.0) + return G.vector_to_nodemap(result, fill_value=0.0) @not_implemented_for("undirected") def out_degree_centrality(G): G = to_directed_graph(G) result = algorithms.out_degree_centrality(G) - return G.vector_to_nodemap(result, fillvalue=0.0) + return G.vector_to_nodemap(result, fill_value=0.0) diff --git a/graphblas_algorithms/nxapi/cluster.py b/graphblas_algorithms/nxapi/cluster.py index 7ac5618..e252963 100644 --- a/graphblas_algorithms/nxapi/cluster.py +++ b/graphblas_algorithms/nxapi/cluster.py @@ -3,7 +3,6 @@ from graphblas_algorithms import algorithms from graphblas_algorithms.classes.digraph import to_graph from graphblas_algorithms.classes.graph import to_undirected_graph -from graphblas_algorithms.classes.nodemap import VectorNodeMap from graphblas_algorithms.utils import not_implemented_for __all__ = [ @@ -25,7 +24,7 @@ def triangles(G, nodes=None): return algorithms.single_triangle(G, nodes) mask = G.list_to_mask(nodes) result = algorithms.triangles(G, mask=mask) - return G.vector_to_nodemap(result, mask=mask, fillvalue=0) + return G.vector_to_nodemap(result, mask=mask, fill_value=0) def transitivity(G): @@ -54,7 +53,7 @@ def clustering(G, nodes=None, weight=None): result = algorithms.clustering_directed(G, weighted=weighted, mask=mask) else: result = algorithms.clustering(G, weighted=weighted, mask=mask) - return G.vector_to_nodemap(result, mask=mask, fillvalue=0.0) + return G.vector_to_nodemap(result, mask=mask, fill_value=0.0) def average_clustering(G, nodes=None, weight=None, count_zeros=True): @@ -116,7 +115,7 @@ def square_clustering(G, nodes=None, *, nsplits=None): result = algorithms.square_clustering(G) else: result = _square_clustering_split(G, nsplits=nsplits) - return G.vector_to_nodemap(result, fillvalue=0) + return G.vector_to_nodemap(result, fill_value=0) elif nodes in G: idx = G._key_to_id[nodes] return algorithms.single_square_clustering(G, idx) @@ -139,6 +138,4 @@ def generalized_degree(G, nodes=None): return G.vector_to_nodemap(result) mask = G.list_to_mask(nodes) result = algorithms.generalized_degree(G, mask=mask) - rv = VectorNodeMap(result, key_to_id=G._key_to_id) - rv._id_to_key = G._id_to_key - return rv + return G.matrix_to_vectornodemap(result) diff --git a/graphblas_algorithms/nxapi/link_analysis/hits_alg.py b/graphblas_algorithms/nxapi/link_analysis/hits_alg.py index 553cd58..3686340 100644 --- a/graphblas_algorithms/nxapi/link_analysis/hits_alg.py +++ b/graphblas_algorithms/nxapi/link_analysis/hits_alg.py @@ -20,4 +20,4 @@ def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True): raise ArpackNoConvergence(*e.args, (), ()) from e # TODO: it would be nice if networkx raised their own exception, such as: # raise nx.PowerIterationFailedConvergence(*e.args) from e - return G.vector_to_nodemap(h, fillvalue=0), G.vector_to_nodemap(a, fillvalue=0) + return G.vector_to_nodemap(h, fill_value=0), G.vector_to_nodemap(a, fill_value=0) diff --git a/graphblas_algorithms/nxapi/link_analysis/pagerank_alg.py b/graphblas_algorithms/nxapi/link_analysis/pagerank_alg.py index 3d6e3c5..ee9b701 100644 --- a/graphblas_algorithms/nxapi/link_analysis/pagerank_alg.py +++ b/graphblas_algorithms/nxapi/link_analysis/pagerank_alg.py @@ -39,6 +39,6 @@ def pagerank( dangling=dangling_weights, row_degrees=row_degrees, ) - return G.vector_to_nodemap(result, fillvalue=0.0) + return G.vector_to_nodemap(result, fill_value=0.0) except algorithms.exceptions.ConvergenceFailure as e: raise PowerIterationFailedConvergence(*e.args) from e diff --git a/graphblas_algorithms/nxapi/shortest_paths/dense.py b/graphblas_algorithms/nxapi/shortest_paths/dense.py index 21a32be..4b62891 100644 --- a/graphblas_algorithms/nxapi/shortest_paths/dense.py +++ b/graphblas_algorithms/nxapi/shortest_paths/dense.py @@ -1,10 +1,19 @@ from graphblas_algorithms import algorithms from graphblas_algorithms.classes.digraph import to_graph -__all__ = ["floyd_warshall"] +__all__ = ["floyd_warshall", "floyd_warshall_predecessor_and_distance"] def floyd_warshall(G, weight="weight"): G = to_graph(G, weight=weight) D = algorithms.floyd_warshall(G, is_weighted=weight is not None) - return G.matrix_to_dicts(D) + return G.matrix_to_nodenodemap(D) + + +def floyd_warshall_predecessor_and_distance(G, weight="weight"): + G = to_graph(G, weight=weight) + P, D = algorithms.floyd_warshall_predecessor_and_distance(G, is_weighted=weight is not None) + return ( + G.matrix_to_nodenodemap(P, values_are_keys=True), + G.matrix_to_nodenodemap(D, fill_value=float("inf")), + ) diff --git a/requirements.txt b/requirements.txt index 90acea8..1c408bf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -python-graphblas >=2022.11.0 +python-graphblas >=2023.1.0 diff --git a/scripts/scipy_impl.py b/scripts/scipy_impl.py index 06c9d81..adf7ee3 100644 --- a/scripts/scipy_impl.py +++ b/scripts/scipy_impl.py @@ -14,7 +14,6 @@ def pagerank( weight="weight", dangling=None, ): - N = A.shape[0] if A.nnz == 0: return {}