From 973fb1c4eb10d8e5eb5eef388d9bc8797da5ea65 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Sat, 11 Mar 2023 08:23:07 -0600 Subject: [PATCH 1/4] Add a few igraph algorithms to run via `scripts/bench.py` --- environment.yml | 9 ++++++ graphblas_algorithms/nxapi/_utils.py | 2 +- scripts/bench.py | 18 ++++++++--- scripts/igraph_impl.py | 46 ++++++++++++++++++++++++++++ 4 files changed, 70 insertions(+), 5 deletions(-) create mode 100644 scripts/igraph_impl.py diff --git a/environment.yml b/environment.yml index 9342aa4..64dc97a 100644 --- a/environment.yml +++ b/environment.yml @@ -33,6 +33,9 @@ dependencies: - numba - python-suitesparse-graphblas - pyyaml + # python-graphblas extra dependencies + - fast_matrix_market + - packaging # networkx default dependencies - matplotlib - pandas @@ -54,3 +57,9 @@ dependencies: - ipython # For type annotations - mypy + # For benchmark comparisons (optional; uncomment as desired) + # - igraph + # - python-igraph + # - networkit + # - graph-tool + # - xorg-libxcursor # for graph-tool diff --git a/graphblas_algorithms/nxapi/_utils.py b/graphblas_algorithms/nxapi/_utils.py index db309a4..0bb9617 100644 --- a/graphblas_algorithms/nxapi/_utils.py +++ b/graphblas_algorithms/nxapi/_utils.py @@ -100,7 +100,7 @@ def partition(chunksize, L, *, evenly=True): yield from L return if evenly: - k = ceil(L / chunksize) + k = ceil(len(L) / chunksize) if k * chunksize != N: yield from split_evenly(k, L) return diff --git a/scripts/bench.py b/scripts/bench.py index ba61300..fa1bb4b 100755 --- a/scripts/bench.py +++ b/scripts/bench.py @@ -14,6 +14,7 @@ import scipy.sparse import graphblas_algorithms as ga +import igraph_impl import scipy_impl from graphblas_algorithms.interface import Dispatcher @@ -56,13 +57,20 @@ def readfile(filepath, is_symmetric, backend): return ga.Graph(A) return ga.DiGraph(A) a = scipy.io.mmread(filepath) - if backend == "networkx": + if backend in {"networkx", "igraph"}: create_using = nx.Graph if is_symmetric else nx.DiGraph - return nx.from_scipy_sparse_array(a, create_using=create_using) + G = nx.from_scipy_sparse_array(a, create_using=create_using) + if backend == "networkx": + return G + if backend == "igraph": + # TODO: is there a better way for igraph to read MM files or scipy.sparse arrays? + import igraph + + return igraph.Graph.from_networkx(G) if backend == "scipy": return scipy.sparse.csr_array(a) raise ValueError( - f"Backend {backend!r} not understood; must be 'graphblas', 'networkx', or 'scipy'" + f"Backend {backend!r} not understood; must be 'graphblas', 'networkx', 'igraph', or 'scipy'" ) @@ -126,6 +134,8 @@ def getfunction(functionname, backend): return getattr(Dispatcher, functionname) if backend == "scipy": return getattr(scipy_impl, functionname) + if backend == "igraph": + return getattr(igraph_impl, functionname) if functionname in functionpaths: func = nx for attr in functionpaths[functionname].split("."): @@ -222,7 +232,7 @@ def main( description=f"Example usage: python {sys.argv[0]} -b graphblas -f pagerank -d amazon0302" ) parser.add_argument( - "-b", "--backend", choices=["graphblas", "networkx", "scipy"], default="graphblas" + "-b", "--backend", choices=["graphblas", "networkx", "scipy", "igraph"], default="graphblas" ) parser.add_argument( "-t", "--time", type=float, default=3.0, help="Target minimum time to run benchmarks" diff --git a/scripts/igraph_impl.py b/scripts/igraph_impl.py new file mode 100644 index 0000000..5d91433 --- /dev/null +++ b/scripts/igraph_impl.py @@ -0,0 +1,46 @@ +def overall_reciprocity(G): + return G.reciprocity() + + +def pagerank( + G, + alpha=0.85, + personalization=None, + max_iter=100, + tol=1e-06, + nstart=None, + weight="weight", + dangling=None, + *, + vertices=None, + directed=True, + arpack_options=None, + implementation="prpack", +): + if personalization is not None: + raise NotImplementedError + if nstart is not None: + raise NotImplementedError + if dangling is not None: + raise NotImplementedError + rv = G.pagerank( + vertices=vertices, + directed=directed, + damping=alpha, + weights=weight, + arpack_options=arpack_options, + implementation=implementation, + ) + return rv + + +def transitivity(G): + return G.transitivity_undirected() + + +def average_clustering(G, nodes=None, weight=None, count_zeros=True): + if nodes is not None: + raise NotImplementedError + # TODO: check results when `count_zeros=False` + mode = "zero" if count_zeros else "nan" + return G.transitivity_avglocal_undirected(mode=mode, weights=weight) From ec81bd1fd6874d0b3ad10ba53188a324ddbe12ab Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Sun, 12 Mar 2023 07:29:40 -0500 Subject: [PATCH 2/4] clustering in igraph --- scripts/igraph_impl.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/igraph_impl.py b/scripts/igraph_impl.py index 5d91433..ccd18a6 100644 --- a/scripts/igraph_impl.py +++ b/scripts/igraph_impl.py @@ -44,3 +44,8 @@ def average_clustering(G, nodes=None, weight=None, count_zeros=True): # TODO: check results when `count_zeros=False` mode = "zero" if count_zeros else "nan" return G.transitivity_avglocal_undirected(mode=mode, weights=weight) + + +def clustering(G, nodes=None, weight=None): + mode = "zero" # or "nan" + return G.transitivity_local_undirected(vertices=nodes, mode=mode, weights=weight) From 4bcc81714e3bbe548e411c97eafff20988486fd5 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Mon, 13 Mar 2023 08:54:34 -0700 Subject: [PATCH 3/4] Add `--min-n` command line option to `scripts/bench.py` --- scripts/bench.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/scripts/bench.py b/scripts/bench.py index fa1bb4b..97ee34d 100755 --- a/scripts/bench.py +++ b/scripts/bench.py @@ -159,7 +159,7 @@ def getgraph(dataname, backend="graphblas", functionname=None): def main( - dataname, backend, functionname, time=3.0, n=None, extra=None, display=True, enable_gc=False + dataname, backend, functionname, time=3.0, n=None, min_n=None, extra=None, display=True, enable_gc=False ): G = getgraph(dataname, backend, functionname) func = getfunction(functionname, backend) @@ -203,6 +203,8 @@ def main( n = 1 elif n is None: n = 2 ** max(0, int(np.ceil(np.log2(time / first_time)))) + if min_n is not None: + n = max(n, min_n) if display: print("Number of runs:", n) print("first: ", stime(first_time)) @@ -242,6 +244,11 @@ def main( type=int, help="The number of times to run the benchmark (the default is to run according to time)", ) + parser.add_argument( + "--min-n", + type=int, + help="The minimum number of times to run the benchmark", + ) parser.add_argument( "-d", "--data", @@ -270,6 +277,7 @@ def main( args.func, time=args.time, n=args.n, + min_n = args.min_n, extra=args.extra, display=not args.json, enable_gc=args.gc, From 8e58900e2c804e89eb12fc56d1a283d7df767b45 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Mon, 13 Mar 2023 15:13:41 -0500 Subject: [PATCH 4/4] oops run pre-commit --- scripts/bench.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/scripts/bench.py b/scripts/bench.py index 97ee34d..7bc3940 100755 --- a/scripts/bench.py +++ b/scripts/bench.py @@ -159,7 +159,15 @@ def getgraph(dataname, backend="graphblas", functionname=None): def main( - dataname, backend, functionname, time=3.0, n=None, min_n=None, extra=None, display=True, enable_gc=False + dataname, + backend, + functionname, + time=3.0, + n=None, + min_n=None, + extra=None, + display=True, + enable_gc=False, ): G = getgraph(dataname, backend, functionname) func = getfunction(functionname, backend) @@ -277,7 +285,7 @@ def main( args.func, time=args.time, n=args.n, - min_n = args.min_n, + min_n=args.min_n, extra=args.extra, display=not args.json, enable_gc=args.gc,