diff --git a/environment.yml b/environment.yml index 9342aa4..64dc97a 100644 --- a/environment.yml +++ b/environment.yml @@ -33,6 +33,9 @@ dependencies: - numba - python-suitesparse-graphblas - pyyaml + # python-graphblas extra dependencies + - fast_matrix_market + - packaging # networkx default dependencies - matplotlib - pandas @@ -54,3 +57,9 @@ dependencies: - ipython # For type annotations - mypy + # For benchmark comparisons (optional; uncomment as desired) + # - igraph + # - python-igraph + # - networkit + # - graph-tool + # - xorg-libxcursor # for graph-tool diff --git a/scripts/bench.py b/scripts/bench.py index ba61300..7bc3940 100755 --- a/scripts/bench.py +++ b/scripts/bench.py @@ -14,6 +14,7 @@ import scipy.sparse import graphblas_algorithms as ga +import igraph_impl import scipy_impl from graphblas_algorithms.interface import Dispatcher @@ -56,13 +57,20 @@ def readfile(filepath, is_symmetric, backend): return ga.Graph(A) return ga.DiGraph(A) a = scipy.io.mmread(filepath) - if backend == "networkx": + if backend in {"networkx", "igraph"}: create_using = nx.Graph if is_symmetric else nx.DiGraph - return nx.from_scipy_sparse_array(a, create_using=create_using) + G = nx.from_scipy_sparse_array(a, create_using=create_using) + if backend == "networkx": + return G + if backend == "igraph": + # TODO: is there a better way for igraph to read MM files or scipy.sparse arrays? + import igraph + + return igraph.Graph.from_networkx(G) if backend == "scipy": return scipy.sparse.csr_array(a) raise ValueError( - f"Backend {backend!r} not understood; must be 'graphblas', 'networkx', or 'scipy'" + f"Backend {backend!r} not understood; must be 'graphblas', 'networkx', 'igraph', or 'scipy'" ) @@ -126,6 +134,8 @@ def getfunction(functionname, backend): return getattr(Dispatcher, functionname) if backend == "scipy": return getattr(scipy_impl, functionname) + if backend == "igraph": + return getattr(igraph_impl, functionname) if functionname in functionpaths: func = nx for attr in functionpaths[functionname].split("."): @@ -149,7 +159,15 @@ def getgraph(dataname, backend="graphblas", functionname=None): def main( - dataname, backend, functionname, time=3.0, n=None, extra=None, display=True, enable_gc=False + dataname, + backend, + functionname, + time=3.0, + n=None, + min_n=None, + extra=None, + display=True, + enable_gc=False, ): G = getgraph(dataname, backend, functionname) func = getfunction(functionname, backend) @@ -193,6 +211,8 @@ def main( n = 1 elif n is None: n = 2 ** max(0, int(np.ceil(np.log2(time / first_time)))) + if min_n is not None: + n = max(n, min_n) if display: print("Number of runs:", n) print("first: ", stime(first_time)) @@ -222,7 +242,7 @@ def main( description=f"Example usage: python {sys.argv[0]} -b graphblas -f pagerank -d amazon0302" ) parser.add_argument( - "-b", "--backend", choices=["graphblas", "networkx", "scipy"], default="graphblas" + "-b", "--backend", choices=["graphblas", "networkx", "scipy", "igraph"], default="graphblas" ) parser.add_argument( "-t", "--time", type=float, default=3.0, help="Target minimum time to run benchmarks" @@ -232,6 +252,11 @@ def main( type=int, help="The number of times to run the benchmark (the default is to run according to time)", ) + parser.add_argument( + "--min-n", + type=int, + help="The minimum number of times to run the benchmark", + ) parser.add_argument( "-d", "--data", @@ -260,6 +285,7 @@ def main( args.func, time=args.time, n=args.n, + min_n=args.min_n, extra=args.extra, display=not args.json, enable_gc=args.gc, diff --git a/scripts/igraph_impl.py b/scripts/igraph_impl.py new file mode 100644 index 0000000..ccd18a6 --- /dev/null +++ b/scripts/igraph_impl.py @@ -0,0 +1,51 @@ +def overall_reciprocity(G): + return G.reciprocity() + + +def pagerank( + G, + alpha=0.85, + personalization=None, + max_iter=100, + tol=1e-06, + nstart=None, + weight="weight", + dangling=None, + *, + vertices=None, + directed=True, + arpack_options=None, + implementation="prpack", +): + if personalization is not None: + raise NotImplementedError + if nstart is not None: + raise NotImplementedError + if dangling is not None: + raise NotImplementedError + rv = G.pagerank( + vertices=vertices, + directed=directed, + damping=alpha, + weights=weight, + arpack_options=arpack_options, + implementation=implementation, + ) + return rv + + +def transitivity(G): + return G.transitivity_undirected() + + +def average_clustering(G, nodes=None, weight=None, count_zeros=True): + if nodes is not None: + raise NotImplementedError + # TODO: check results when `count_zeros=False` + mode = "zero" if count_zeros else "nan" + return G.transitivity_avglocal_undirected(mode=mode, weights=weight) + + +def clustering(G, nodes=None, weight=None): + mode = "zero" # or "nan" + return G.transitivity_local_undirected(vertices=nodes, mode=mode, weights=weight)