diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0ab1e27..4a65468 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -32,9 +32,10 @@ jobs: run: | conda install -c conda-forge joblib scipy pandas pytest-cov pytest-randomly # matplotlib lxml pygraphviz pydot sympy # Extra networkx deps we don't need yet - pip install git+https://github.com/networkx/networkx.git@main --no-deps - pip install -e . --no-deps + python -m pip install git+https://github.com/networkx/networkx.git@main + python -m pip install . echo "Done with installing" - name: PyTest run: | NETWORKX_GRAPH_CONVERT=parallel pytest --pyargs networkx + python -m pytest --pyargs nx_parallel diff --git a/.gitignore b/.gitignore index aa44ee2..b6e4761 100644 --- a/.gitignore +++ b/.gitignore @@ -127,4 +127,3 @@ dmypy.json # Pyre type checker .pyre/ - diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..33038c6 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,31 @@ +# Install pre-commit hooks via +# pre-commit install + +repos: + - repo: https://github.com/psf/black + rev: 23.3.0 + hooks: + - id: black + - repo: https://github.com/adamchainz/blacken-docs + rev: 1.13.0 + hooks: + - id: blacken-docs + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v2.7.1 + hooks: + - id: prettier + files: \.(html|md|toml|yml|yaml) + args: [--prose-wrap=preserve] + - repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.0.258 + hooks: + - id: ruff + args: + - --fix + - repo: local + hooks: + - id: pyproject.toml + name: pyproject.toml + language: system + entry: python tools/generate_pyproject.toml.py + files: "pyproject.toml|requirements/.*\\.txt|tools/.*pyproject.*" diff --git a/README.md b/README.md index cd5079b..cf7cd9a 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -NX-Parallel +nx_parallel ----------- -A NetworkX backend plugin which uses dask for parallelization. +A NetworkX backend plugin which uses joblib and multiprocessing for parallelization. ``` python In [1]: import networkx as nx; import nx_parallel @@ -23,4 +23,19 @@ Out[4]: 8: 0.0, 9: 0.0} -``` \ No newline at end of file +``` + +Currently the following functions have parallelized implementations: + - centrality + - betweenness_centrality + - tournament + - is_reachable + - closeness_vitality + - efficiency_measures + - local_efficiency + +![alt text](timing/heatmap_all_functions.png) + +See the ```/timing``` folder for more heatmaps and code for heatmap generation! + + diff --git a/nx_parallel/__init__.py b/nx_parallel/__init__.py index f16b848..fefc51d 100644 --- a/nx_parallel/__init__.py +++ b/nx_parallel/__init__.py @@ -1,3 +1,3 @@ -from .centrality import * -from .graph import * +from .algorithms import * +from .classes import * from .interface import * diff --git a/nx_parallel/algorithms/__init__.py b/nx_parallel/algorithms/__init__.py new file mode 100644 index 0000000..d98e7db --- /dev/null +++ b/nx_parallel/algorithms/__init__.py @@ -0,0 +1,8 @@ +# subpackages +from .centrality import * +from .utils import * + +# modules +from .efficiency_measures import * +from .isolate import * +from .tournament import * diff --git a/nx_parallel/algorithms/centrality/__init__.py b/nx_parallel/algorithms/centrality/__init__.py new file mode 100644 index 0000000..cf7adb6 --- /dev/null +++ b/nx_parallel/algorithms/centrality/__init__.py @@ -0,0 +1 @@ +from .betweenness import * diff --git a/nx_parallel/algorithms/centrality/betweenness.py b/nx_parallel/algorithms/centrality/betweenness.py new file mode 100644 index 0000000..ea793ed --- /dev/null +++ b/nx_parallel/algorithms/centrality/betweenness.py @@ -0,0 +1,121 @@ +from joblib import Parallel, delayed, cpu_count +from nx_parallel.algorithms.utils.chunk import chunks +from networkx.utils import py_random_state +from networkx.algorithms.centrality.betweenness import ( + _rescale, + _single_source_shortest_path_basic, + _single_source_dijkstra_path_basic, + _accumulate_endpoints, + _accumulate_basic, +) + +__all__ = ["betweenness_centrality"] + + +@py_random_state(5) +def betweenness_centrality( + G, k=None, normalized=True, weight=None, endpoints=False, seed=None +): + r"""Parallel Compute shortest-path betweenness centrality for nodes + + Betweenness centrality of a node $v$ is the sum of the + fraction of all-pairs shortest paths that pass through $v$ + + .. math:: + + c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)} + + where $V$ is the set of nodes, $\sigma(s, t)$ is the number of + shortest $(s, t)$-paths, and $\sigma(s, t|v)$ is the number of + those paths passing through some node $v$ other than $s, t$. + If $s = t$, $\sigma(s, t) = 1$, and if $v \in {s, t}$, + $\sigma(s, t|v) = 0$ [2]_. + + Parameters + ---------- + G : graph + A NetworkX graph. + + k : int, optional (default=None) + If k is not None use k node samples to estimate betweenness. + The value of k <= n where n is the number of nodes in the graph. + Higher values give better approximation. + + normalized : bool, optional + If True the betweenness values are normalized by `2/((n-1)(n-2))` + for graphs, and `1/((n-1)(n-2))` for directed graphs where `n` + is the number of nodes in G. + + weight : None or string, optional (default=None) + If None, all edge weights are considered equal. + Otherwise holds the name of the edge attribute used as weight. + Weights are used to calculate weighted shortest paths, so they are + interpreted as distances. + + endpoints : bool, optional + If True include the endpoints in the shortest path counts. + + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness`. + Note that this is only used if k is not None. + + Returns + ------- + nodes : dictionary + Dictionary of nodes with betweenness centrality as the value. + + Notes + ----- + This algorithm is a parallelized version of betwenness centrality in NetworkX. + Nodes are divided into chunks based on the number of available processors, + and otherwise all calculations are similar. + """ + if k is None: + nodes = G.nodes + else: + nodes = seed.sample(list(G.nodes), k) + total_cores = cpu_count() + num_chunks = max(len(nodes) // total_cores, 1) + node_chunks = list(chunks(nodes, num_chunks)) + bt_cs = Parallel(n_jobs=total_cores)( + delayed(betweenness_centrality_node_subset)( + G, + chunk, + weight, + endpoints, + ) + for chunk in node_chunks + ) + + # Reducing partial solution + bt_c = bt_cs[0] + for bt in bt_cs[1:]: + for n in bt: + bt_c[n] += bt[n] + + betweenness = _rescale( + bt_c, + len(G), + normalized=normalized, + directed=G.is_directed(), + k=k, + endpoints=endpoints, + ) + return betweenness + + +def betweenness_centrality_node_subset(G, nodes, weight=None, endpoints=False): + betweenness = dict.fromkeys(G, 0.0) + for s in nodes: + # single source shortest paths + if weight is None: # use BFS + S, P, sigma, _ = _single_source_shortest_path_basic(G, s) + else: # use Dijkstra's algorithm + S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight) + # accumulation + if endpoints: + betweenness, delta = _accumulate_endpoints(betweenness, S, P, sigma, s) + else: + betweenness, delta = _accumulate_basic(betweenness, S, P, sigma, s) + return betweenness diff --git a/nx_parallel/algorithms/centrality/tests/test_betweenness.py b/nx_parallel/algorithms/centrality/tests/test_betweenness.py new file mode 100644 index 0000000..8601750 --- /dev/null +++ b/nx_parallel/algorithms/centrality/tests/test_betweenness.py @@ -0,0 +1,579 @@ +import pytest + +import networkx as nx; import nx_parallel + + +def weighted_G(): + G = nx.Graph() + G.add_edge(0, 1, weight=3) + G.add_edge(0, 2, weight=2) + G.add_edge(0, 3, weight=6) + G.add_edge(0, 4, weight=4) + G.add_edge(1, 3, weight=5) + G.add_edge(1, 5, weight=5) + G.add_edge(2, 4, weight=1) + G.add_edge(3, 4, weight=2) + G.add_edge(3, 5, weight=1) + G.add_edge(4, 5, weight=4) + return G + + +class TestBetweennessCentrality: + def test_K5(self): + """Betweenness centrality: K5""" + G = nx.complete_graph(5) + H = nx_parallel.ParallelGraph(G) + b = nx.betweenness_centrality(H, weight=None, normalized=False) + b_answer = {0: 0.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0} + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-7) + + def test_K5_endpoints(self): + """Betweenness centrality: K5 endpoints""" + G = nx.complete_graph(5) + H = nx_parallel.ParallelGraph(G) + b = nx.betweenness_centrality(H, weight=None, normalized=False, endpoints=True) + b_answer = {0: 4.0, 1: 4.0, 2: 4.0, 3: 4.0, 4: 4.0} + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-7) + # normalized = True case + b = nx.betweenness_centrality(H, weight=None, normalized=True, endpoints=True) + b_answer = {0: 0.4, 1: 0.4, 2: 0.4, 3: 0.4, 4: 0.4} + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-7) + + def test_P3_normalized(self): + """Betweenness centrality: P3 normalized""" + G = nx.path_graph(3) + H = nx_parallel.ParallelDiGraph(G) + b = nx.betweenness_centrality(H, weight=None, normalized=True) + b_answer = {0: 0.0, 1: 1.0, 2: 0.0} + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-7) + + def test_P3(self): + """Betweenness centrality: P3""" + G = nx.path_graph(3) + H = nx_parallel.ParallelGraph(G) + b_answer = {0: 0.0, 1: 1.0, 2: 0.0} + b = nx.betweenness_centrality(H, weight=None, normalized=False) + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-7) + + def test_sample_from_P3(self): + """Betweenness centrality: P3 sample""" + G = nx.path_graph(3) + H = nx_parallel.ParallelGraph(G) + b_answer = {0: 0.0, 1: 1.0, 2: 0.0} + b = nx.betweenness_centrality(H, k=3, weight=None, normalized=False, seed=1) + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-7) + b = nx.betweenness_centrality(H, k=2, weight=None, normalized=False, seed=1) + # python versions give different results with same seed + b_approx1 = {0: 0.0, 1: 1.5, 2: 0.0} + b_approx2 = {0: 0.0, 1: 0.75, 2: 0.0} + for n in sorted(G): + assert b[n] in (b_approx1[n], b_approx2[n]) + + def test_P3_endpoints(self): + """Betweenness centrality: P3 endpoints""" + G = nx.path_graph(3) + H = nx_parallel.ParallelGraph(G) + b_answer = {0: 2.0, 1: 3.0, 2: 2.0} + b = nx.betweenness_centrality(H, weight=None, normalized=False, endpoints=True) + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-7) + # normalized = True case + b_answer = {0: 2 / 3, 1: 1.0, 2: 2 / 3} + b = nx.betweenness_centrality(H, weight=None, normalized=True, endpoints=True) + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-7) + + def test_krackhardt_kite_graph(self): + """Betweenness centrality: Krackhardt kite graph""" + G = nx.krackhardt_kite_graph() + H = nx_parallel.ParallelGraph(G) + b_answer = { + 0: 1.667, + 1: 1.667, + 2: 0.000, + 3: 7.333, + 4: 0.000, + 5: 16.667, + 6: 16.667, + 7: 28.000, + 8: 16.000, + 9: 0.000, + } + for b in b_answer: + b_answer[b] /= 2 + b = nx.betweenness_centrality(H, weight=None, normalized=False) + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-3) + + def test_krackhardt_kite_graph_normalized(self): + """Betweenness centrality: Krackhardt kite graph normalized""" + G = nx.krackhardt_kite_graph() + H = nx_parallel.ParallelGraph(G) + b_answer = { + 0: 0.023, + 1: 0.023, + 2: 0.000, + 3: 0.102, + 4: 0.000, + 5: 0.231, + 6: 0.231, + 7: 0.389, + 8: 0.222, + 9: 0.000, + } + b = nx.betweenness_centrality(H, weight=None, normalized=True) + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-3) + + def test_florentine_families_graph(self): + """Betweenness centrality: Florentine families graph""" + G = nx.florentine_families_graph() + H = nx_parallel.ParallelGraph(G) + b_answer = { + "Acciaiuoli": 0.000, + "Albizzi": 0.212, + "Barbadori": 0.093, + "Bischeri": 0.104, + "Castellani": 0.055, + "Ginori": 0.000, + "Guadagni": 0.255, + "Lamberteschi": 0.000, + "Medici": 0.522, + "Pazzi": 0.000, + "Peruzzi": 0.022, + "Ridolfi": 0.114, + "Salviati": 0.143, + "Strozzi": 0.103, + "Tornabuoni": 0.092, + } + + b = nx.betweenness_centrality(H, weight=None, normalized=True) + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-3) + + def test_les_miserables_graph(self): + """Betweenness centrality: Les Miserables graph""" + G = nx.les_miserables_graph() + H = nx_parallel.ParallelGraph(G) + b_answer = { + "Napoleon": 0.000, + "Myriel": 0.177, + "MlleBaptistine": 0.000, + "MmeMagloire": 0.000, + "CountessDeLo": 0.000, + "Geborand": 0.000, + "Champtercier": 0.000, + "Cravatte": 0.000, + "Count": 0.000, + "OldMan": 0.000, + "Valjean": 0.570, + "Labarre": 0.000, + "Marguerite": 0.000, + "MmeDeR": 0.000, + "Isabeau": 0.000, + "Gervais": 0.000, + "Listolier": 0.000, + "Tholomyes": 0.041, + "Fameuil": 0.000, + "Blacheville": 0.000, + "Favourite": 0.000, + "Dahlia": 0.000, + "Zephine": 0.000, + "Fantine": 0.130, + "MmeThenardier": 0.029, + "Thenardier": 0.075, + "Cosette": 0.024, + "Javert": 0.054, + "Fauchelevent": 0.026, + "Bamatabois": 0.008, + "Perpetue": 0.000, + "Simplice": 0.009, + "Scaufflaire": 0.000, + "Woman1": 0.000, + "Judge": 0.000, + "Champmathieu": 0.000, + "Brevet": 0.000, + "Chenildieu": 0.000, + "Cochepaille": 0.000, + "Pontmercy": 0.007, + "Boulatruelle": 0.000, + "Eponine": 0.011, + "Anzelma": 0.000, + "Woman2": 0.000, + "MotherInnocent": 0.000, + "Gribier": 0.000, + "MmeBurgon": 0.026, + "Jondrette": 0.000, + "Gavroche": 0.165, + "Gillenormand": 0.020, + "Magnon": 0.000, + "MlleGillenormand": 0.048, + "MmePontmercy": 0.000, + "MlleVaubois": 0.000, + "LtGillenormand": 0.000, + "Marius": 0.132, + "BaronessT": 0.000, + "Mabeuf": 0.028, + "Enjolras": 0.043, + "Combeferre": 0.001, + "Prouvaire": 0.000, + "Feuilly": 0.001, + "Courfeyrac": 0.005, + "Bahorel": 0.002, + "Bossuet": 0.031, + "Joly": 0.002, + "Grantaire": 0.000, + "MotherPlutarch": 0.000, + "Gueulemer": 0.005, + "Babet": 0.005, + "Claquesous": 0.005, + "Montparnasse": 0.004, + "Toussaint": 0.000, + "Child1": 0.000, + "Child2": 0.000, + "Brujon": 0.000, + "MmeHucheloup": 0.000, + } + + b = nx.betweenness_centrality(H, weight=None, normalized=True) + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-3) + + def test_ladder_graph(self): + """Betweenness centrality: Ladder graph""" + G = nx.Graph() # ladder_graph(3) + G.add_edges_from([(0, 1), (0, 2), (1, 3), (2, 3), (2, 4), (4, 5), (3, 5)]) + H = nx_parallel.ParallelGraph(G) + b_answer = {0: 1.667, 1: 1.667, 2: 6.667, 3: 6.667, 4: 1.667, 5: 1.667} + for b in b_answer: + b_answer[b] /= 2 + b = nx.betweenness_centrality(H, weight=None, normalized=False) + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-3) + + def test_disconnected_path(self): + """Betweenness centrality: disconnected path""" + G = nx.Graph() + nx.add_path(G, [0, 1, 2]) + nx.add_path(G, [3, 4, 5, 6]) + H = nx_parallel.ParallelGraph(G) + b_answer = {0: 0, 1: 1, 2: 0, 3: 0, 4: 2, 5: 2, 6: 0} + b = nx.betweenness_centrality(H, weight=None, normalized=False) + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-7) + + def test_disconnected_path_endpoints(self): + """Betweenness centrality: disconnected path endpoints""" + G = nx.Graph() + nx.add_path(G, [0, 1, 2]) + nx.add_path(G, [3, 4, 5, 6]) + H = nx_parallel.ParallelGraph(G) + b_answer = {0: 2, 1: 3, 2: 2, 3: 3, 4: 5, 5: 5, 6: 3} + b = nx.betweenness_centrality(H, weight=None, normalized=False, endpoints=True) + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-7) + # normalized = True case + b = nx.betweenness_centrality(H, weight=None, normalized=True, endpoints=True) + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n] / 21, abs=1e-7) + + def test_directed_path(self): + """Betweenness centrality: directed path""" + G = nx.DiGraph() + nx.add_path(G, [0, 1, 2]) + H = nx_parallel.ParallelDiGraph(G) + b = nx.betweenness_centrality(H, weight=None, normalized=False) + b_answer = {0: 0.0, 1: 1.0, 2: 0.0} + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-7) + + def test_directed_path_normalized(self): + """Betweenness centrality: directed path normalized""" + G = nx.DiGraph() + nx.add_path(G, [0, 1, 2]) + H = nx_parallel.ParallelDiGraph(G) + b = nx.betweenness_centrality(H, weight=None, normalized=True) + b_answer = {0: 0.0, 1: 0.5, 2: 0.0} + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-7) + + +class TestWeightedBetweennessCentrality: + def test_K5(self): + """Weighted betweenness centrality: K5""" + G = nx.complete_graph(5) + H = nx_parallel.ParallelGraph(G) + b = nx.betweenness_centrality(H, weight="weight", normalized=False) + b_answer = {0: 0.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0} + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-7) + + def test_P3_normalized(self): + """Weighted betweenness centrality: P3 normalized""" + G = nx.path_graph(3) + H = nx_parallel.ParallelDiGraph(G) + b = nx.betweenness_centrality(H, weight="weight", normalized=True) + b_answer = {0: 0.0, 1: 1.0, 2: 0.0} + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-7) + + def test_P3(self): + """Weighted betweenness centrality: P3""" + G = nx.path_graph(3) + H = nx_parallel.ParallelGraph(G) + b_answer = {0: 0.0, 1: 1.0, 2: 0.0} + b = nx.betweenness_centrality(H, weight="weight", normalized=False) + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-7) + + def test_krackhardt_kite_graph(self): + """Weighted betweenness centrality: Krackhardt kite graph""" + G = nx.krackhardt_kite_graph() + H = nx_parallel.ParallelGraph(G) + b_answer = { + 0: 1.667, + 1: 1.667, + 2: 0.000, + 3: 7.333, + 4: 0.000, + 5: 16.667, + 6: 16.667, + 7: 28.000, + 8: 16.000, + 9: 0.000, + } + for b in b_answer: + b_answer[b] /= 2 + + b = nx.betweenness_centrality(H, weight="weight", normalized=False) + + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-3) + + def test_krackhardt_kite_graph_normalized(self): + """Weighted betweenness centrality: + Krackhardt kite graph normalized + """ + G = nx.krackhardt_kite_graph() + H = nx_parallel.ParallelGraph(G) + b_answer = { + 0: 0.023, + 1: 0.023, + 2: 0.000, + 3: 0.102, + 4: 0.000, + 5: 0.231, + 6: 0.231, + 7: 0.389, + 8: 0.222, + 9: 0.000, + } + b = nx.betweenness_centrality(H, weight="weight", normalized=True) + + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-3) + + def test_florentine_families_graph(self): + """Weighted betweenness centrality: + Florentine families graph""" + G = nx.florentine_families_graph() + H = nx_parallel.ParallelGraph(G) + b_answer = { + "Acciaiuoli": 0.000, + "Albizzi": 0.212, + "Barbadori": 0.093, + "Bischeri": 0.104, + "Castellani": 0.055, + "Ginori": 0.000, + "Guadagni": 0.255, + "Lamberteschi": 0.000, + "Medici": 0.522, + "Pazzi": 0.000, + "Peruzzi": 0.022, + "Ridolfi": 0.114, + "Salviati": 0.143, + "Strozzi": 0.103, + "Tornabuoni": 0.092, + } + + b = nx.betweenness_centrality(H, weight="weight", normalized=True) + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-3) + + def test_les_miserables_graph(self): + """Weighted betweenness centrality: Les Miserables graph""" + G = nx.les_miserables_graph() + H = nx_parallel.ParallelDiGraph(G) + b_answer = { + "Napoleon": 0.000, + "Myriel": 0.177, + "MlleBaptistine": 0.000, + "MmeMagloire": 0.000, + "CountessDeLo": 0.000, + "Geborand": 0.000, + "Champtercier": 0.000, + "Cravatte": 0.000, + "Count": 0.000, + "OldMan": 0.000, + "Valjean": 0.454, + "Labarre": 0.000, + "Marguerite": 0.009, + "MmeDeR": 0.000, + "Isabeau": 0.000, + "Gervais": 0.000, + "Listolier": 0.000, + "Tholomyes": 0.066, + "Fameuil": 0.000, + "Blacheville": 0.000, + "Favourite": 0.000, + "Dahlia": 0.000, + "Zephine": 0.000, + "Fantine": 0.114, + "MmeThenardier": 0.046, + "Thenardier": 0.129, + "Cosette": 0.075, + "Javert": 0.193, + "Fauchelevent": 0.026, + "Bamatabois": 0.080, + "Perpetue": 0.000, + "Simplice": 0.001, + "Scaufflaire": 0.000, + "Woman1": 0.000, + "Judge": 0.000, + "Champmathieu": 0.000, + "Brevet": 0.000, + "Chenildieu": 0.000, + "Cochepaille": 0.000, + "Pontmercy": 0.023, + "Boulatruelle": 0.000, + "Eponine": 0.023, + "Anzelma": 0.000, + "Woman2": 0.000, + "MotherInnocent": 0.000, + "Gribier": 0.000, + "MmeBurgon": 0.026, + "Jondrette": 0.000, + "Gavroche": 0.285, + "Gillenormand": 0.024, + "Magnon": 0.005, + "MlleGillenormand": 0.036, + "MmePontmercy": 0.005, + "MlleVaubois": 0.000, + "LtGillenormand": 0.015, + "Marius": 0.072, + "BaronessT": 0.004, + "Mabeuf": 0.089, + "Enjolras": 0.003, + "Combeferre": 0.000, + "Prouvaire": 0.000, + "Feuilly": 0.004, + "Courfeyrac": 0.001, + "Bahorel": 0.007, + "Bossuet": 0.028, + "Joly": 0.000, + "Grantaire": 0.036, + "MotherPlutarch": 0.000, + "Gueulemer": 0.025, + "Babet": 0.015, + "Claquesous": 0.042, + "Montparnasse": 0.050, + "Toussaint": 0.011, + "Child1": 0.000, + "Child2": 0.000, + "Brujon": 0.002, + "MmeHucheloup": 0.034, + } + + b = nx.betweenness_centrality(H, weight="weight", normalized=True) + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-3) + + def test_ladder_graph(self): + """Weighted betweenness centrality: Ladder graph""" + G = nx.Graph() # ladder_graph(3) + G.add_edges_from([(0, 1), (0, 2), (1, 3), (2, 3), (2, 4), (4, 5), (3, 5)]) + H = nx_parallel.ParallelGraph(G) + b_answer = {0: 1.667, 1: 1.667, 2: 6.667, 3: 6.667, 4: 1.667, 5: 1.667} + for b in b_answer: + b_answer[b] /= 2 + b = nx.betweenness_centrality(H, weight="weight", normalized=False) + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-3) + + def test_G(self): + """Weighted betweenness centrality: G""" + G = weighted_G() + H = nx_parallel.ParallelGraph(G) + b_answer = {0: 2.0, 1: 0.0, 2: 4.0, 3: 3.0, 4: 4.0, 5: 0.0} + b = nx.betweenness_centrality(H, weight="weight", normalized=False) + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-7) + + def test_G2(self): + """Weighted betweenness centrality: G2""" + G = nx.DiGraph() + G.add_weighted_edges_from( + [ + ("s", "u", 10), + ("s", "x", 5), + ("u", "v", 1), + ("u", "x", 2), + ("v", "y", 1), + ("x", "u", 3), + ("x", "v", 5), + ("x", "y", 2), + ("y", "s", 7), + ("y", "v", 6), + ] + ) + H = nx_parallel.ParallelDiGraph(G) + b_answer = {"y": 5.0, "x": 5.0, "s": 4.0, "u": 2.0, "v": 2.0} + + b = nx.betweenness_centrality(H, weight="weight", normalized=False) + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-7) + + def test_G3(self): + """Weighted betweenness centrality: G3""" + G = nx.MultiGraph(weighted_G()) + es = list(G.edges(data=True))[::2] # duplicate every other edge + G.add_edges_from(es) + H = nx_parallel.ParallelMultiGraph(G) + b_answer = {0: 2.0, 1: 0.0, 2: 4.0, 3: 3.0, 4: 4.0, 5: 0.0} + b = nx.betweenness_centrality(G, weight="weight", normalized=False) + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-7) + + def test_G4(self): + """Weighted betweenness centrality: G4""" + G = nx.MultiDiGraph() + G.add_weighted_edges_from( + [ + ("s", "u", 10), + ("s", "x", 5), + ("s", "x", 6), + ("u", "v", 1), + ("u", "x", 2), + ("v", "y", 1), + ("v", "y", 1), + ("x", "u", 3), + ("x", "v", 5), + ("x", "y", 2), + ("x", "y", 3), + ("y", "s", 7), + ("y", "v", 6), + ("y", "v", 6), + ] + ) + + b_answer = {"y": 5.0, "x": 5.0, "s": 4.0, "u": 2.0, "v": 2.0} + H = nx_parallel.ParallelMultiDiGraph(G) + b = nx.betweenness_centrality(H, weight="weight", normalized=False) + for n in sorted(G): + assert b[n] == pytest.approx(b_answer[n], abs=1e-7) diff --git a/nx_parallel/algorithms/efficiency_measures.py b/nx_parallel/algorithms/efficiency_measures.py new file mode 100644 index 0000000..568a068 --- /dev/null +++ b/nx_parallel/algorithms/efficiency_measures.py @@ -0,0 +1,90 @@ +"""Provides functions for computing the efficiency of nodes and graphs.""" +from joblib import Parallel, cpu_count, delayed +from nx_parallel.algorithms.utils.chunk import chunks +from nx_parallel.classes.graph import ( + ParallelGraph, + ParallelDiGraph, + ParallelMultiDiGraph, + ParallelMultiGraph, +) +import networkx as nx +from networkx.utils import not_implemented_for + +__all__ = ["local_efficiency"] + +"""Helper to interface between graph types""" + + +def _convert(G): + if isinstance(G, ParallelMultiDiGraph): + return ParallelMultiDiGraph.to_networkx(G) + if isinstance(G, ParallelMultiGraph): + return ParallelMultiGraph.to_networkx(G) + if isinstance(G, ParallelDiGraph): + return ParallelDiGraph.to_networkx(G) + if isinstance(G, ParallelGraph): + return ParallelGraph.to_networkx(G) + + +@not_implemented_for("directed") +def efficiency(G, u, v): + return nx.efficiency(_convert(G), u, v) + + +@not_implemented_for("directed") +def global_efficiency(G): + return nx.global_efficiency(_convert(G)) + + +@not_implemented_for("directed") +def local_efficiency(G): + """Returns the average local efficiency of the graph. + + The *efficiency* of a pair of nodes in a graph is the multiplicative + inverse of the shortest path distance between the nodes. The *local + efficiency* of a node in the graph is the average global efficiency of the + subgraph induced by the neighbors of the node. The *average local + efficiency* is the average of the local efficiencies of each node [1]_. + + Parameters + ---------- + G : :class:`networkx.Graph` + An undirected graph for which to compute the average local efficiency. + + Returns + ------- + float + The average local efficiency of the graph. + + Examples + -------- + >>> G = nx.Graph([(0, 1), (0, 2), (0, 3), (1, 2), (1, 3)]) + >>> nx.local_efficiency(G) + 0.9166666666666667 + + Notes + ----- + Edge weights are ignored when computing the shortest path distances. + + See also + -------- + global_efficiency + + References + ---------- + .. [1] Latora, Vito, and Massimo Marchiori. + "Efficient behavior of small-world networks." + *Physical Review Letters* 87.19 (2001): 198701. + + """ + total_cores = cpu_count() + num_chunks = max(len(G.nodes) // total_cores, 1) + node_chunks = list(chunks(G.nodes, num_chunks)) + efficiencies = Parallel(n_jobs=total_cores)( + delayed(local_efficiency_node_subset)(G, chunk) for chunk in node_chunks + ) + return sum(efficiencies) / len(G) + + +def local_efficiency_node_subset(G, nodes): + return sum(global_efficiency(G.subgraph(G[v])) for v in nodes) diff --git a/nx_parallel/algorithms/isolate.py b/nx_parallel/algorithms/isolate.py new file mode 100644 index 0000000..2e745d8 --- /dev/null +++ b/nx_parallel/algorithms/isolate.py @@ -0,0 +1,42 @@ +from joblib import Parallel, cpu_count, delayed +import networkx as nx +from nx_parallel.algorithms.utils.chunk import chunks + +__all__ = ["number_of_isolates"] + +"""Identical to networkx implementation""" + + +def is_isolate(G, n): + return nx.is_isolate(G.originalGraph, n) + + +"""Identical to networkx implementation""" + + +def isolates(G): + return nx.isolates(G.originalGraph) + + +def number_of_isolates(G): + """Returns the number of isolates in the graph. Parallel implementation. + + An *isolate* is a node with no neighbors (that is, with degree + zero). For directed graphs, this means no in-neighbors and no + out-neighbors. + + Parameters + ---------- + G : NetworkX graph + + Returns + ------- + int + The number of degree zero nodes in the graph `G`. + + """ + isolates_list = list(isolates(G)) + num_chunks = max(len(isolates_list) // cpu_count(), 1) + isolate_chunks = chunks(isolates_list, num_chunks) + results = Parallel(n_jobs=-1)(delayed(len)(chunk) for chunk in isolate_chunks) + return sum(results) diff --git a/nx_parallel/algorithms/tests/test_efficiency.py b/nx_parallel/algorithms/tests/test_efficiency.py new file mode 100644 index 0000000..1316b01 --- /dev/null +++ b/nx_parallel/algorithms/tests/test_efficiency.py @@ -0,0 +1,64 @@ +"""Unit tests for the :mod:`networkx.algorithms.efficiency` module.""" + +import networkx as nx; import nx_parallel + + +class TestEfficiency: + def setup_method(self): + # G1 is a disconnected graph + self.G1 = nx.Graph() + self.G1.add_nodes_from([1, 2, 3]) + self.H1 = nx_parallel.ParallelGraph(self.G1) + # G2 is a cycle graph + self.G2 = nx.cycle_graph(4) + self.H2 = nx_parallel.ParallelGraph(self.G2) + # G3 is the triangle graph with one additional edge + self.G3 = nx.lollipop_graph(3, 1) + self.H3 = nx_parallel.ParallelGraph(self.G3) + + + def test_efficiency_disconnected_nodes(self): + """ + When nodes are disconnected, efficiency is 0 + """ + assert nx.efficiency(self.H1, 1, 2) == 0 + + def test_local_efficiency_disconnected_graph(self): + """ + In a disconnected graph the efficiency is 0 + """ + assert nx.local_efficiency(self.H1) == 0 + + def test_efficiency(self): + assert nx.efficiency(self.H2, 0, 1) == 1 + assert nx.efficiency(self.H2, 0, 2) == 1 / 2 + + def test_global_efficiency(self): + assert nx.global_efficiency(self.H2) == 5 / 6 + + def test_global_efficiency_complete_graph(self): + """ + Tests that the average global efficiency of the complete graph is one. + """ + for n in range(2, 10): + G = nx.complete_graph(n) + H = nx_parallel.ParallelGraph(G) + assert nx.global_efficiency(H) == 1 + + def test_local_efficiency_complete_graph(self): + """ + Test that the local efficiency for a complete graph with at least 3 + nodes should be one. For a graph with only 2 nodes, the induced + subgraph has no edges. + """ + for n in range(3, 10): + G = nx.complete_graph(n) + H = nx_parallel.ParallelGraph(G) + assert nx.local_efficiency(H) == 1 + + def test_using_ego_graph(self): + """ + Test that the ego graph is used when computing local efficiency. + For more information, see GitHub issue #2710. + """ + assert nx.local_efficiency(self.H3) == 7 / 12 \ No newline at end of file diff --git a/nx_parallel/algorithms/tests/test_isolate.py b/nx_parallel/algorithms/tests/test_isolate.py new file mode 100644 index 0000000..35d4c7c --- /dev/null +++ b/nx_parallel/algorithms/tests/test_isolate.py @@ -0,0 +1,29 @@ +"""Unit tests for the :mod:`networkx.algorithms.isolates` module. Modified for nx_parallel backend""" + +import networkx as nx; import nx_parallel + + +def test_is_isolate(): + G = nx.Graph() + G.add_edge(0, 1) + G.add_node(2) + H = nx_parallel.ParallelGraph(G) + assert not nx.is_isolate(H, 0) + assert not nx.is_isolate(H, 1) + assert nx.is_isolate(H, 2) + + +def test_isolates(): + G = nx.Graph() + G.add_edge(0, 1) + G.add_nodes_from([2, 3]) + H = nx_parallel.ParallelGraph(G) + assert sorted(nx.isolates(H)) == [2, 3] + + +def test_number_of_isolates(): + G = nx.Graph() + G.add_edge(0, 1) + G.add_nodes_from([2, 3]) + H = nx_parallel.ParallelGraph(G) + assert nx.number_of_isolates(H) == 2 diff --git a/nx_parallel/algorithms/tests/test_tournament.py b/nx_parallel/algorithms/tests/test_tournament.py new file mode 100644 index 0000000..51d06e5 --- /dev/null +++ b/nx_parallel/algorithms/tests/test_tournament.py @@ -0,0 +1,181 @@ +"""Unit tests for the :mod:`networkx.algorithms.tournament` module. Modified for nx_parallel backend""" +from itertools import combinations + +import pytest + +from networkx import DiGraph; import nx_parallel + +from networkx.algorithms.tournament import ( + hamiltonian_path, + index_satisfying, + is_reachable, + is_strongly_connected, + is_tournament, + random_tournament, + score_sequence, + tournament_matrix, +) + + +def test_condition_not_satisfied(): + condition = lambda x: x > 0 + iter_in = [0] + assert index_satisfying(iter_in, condition) == 1 + + +def test_empty_iterable(): + condition = lambda x: x > 0 + with pytest.raises(ValueError): + index_satisfying([], condition) + + +def test_is_tournament(): + G = DiGraph() + G.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 0), (1, 3), (0, 2)]) + H = nx_parallel.ParallelDiGraph(G) + assert is_tournament(H) + + +def test_self_loops(): + """A tournament must have no self-loops.""" + G = DiGraph() + G.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 0), (1, 3), (0, 2)]) + G.add_edge(0, 0) + H = nx_parallel.ParallelDiGraph(G) + assert not is_tournament(H) + + +def test_missing_edges(): + """A tournament must not have any pair of nodes without at least + one edge joining the pair. + + """ + G = DiGraph() + G.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 0), (1, 3)]) + H = nx_parallel.ParallelDiGraph(G) + assert not is_tournament(H) + + +def test_bidirectional_edges(): + """A tournament must not have any pair of nodes with greater + than one edge joining the pair. + + """ + G = DiGraph() + G.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 0), (1, 3), (0, 2)]) + G.add_edge(1, 0) + H = nx_parallel.ParallelDiGraph(G) + assert not is_tournament(H) + + +def test_graph_is_tournament(): + for _ in range(10): + G = random_tournament(5) + H = nx_parallel.ParallelDiGraph(G) + assert is_tournament(H) + + +def test_graph_is_tournament_seed(): + for _ in range(10): + G = random_tournament(5, seed=1) + H = nx_parallel.ParallelDiGraph(G) + assert is_tournament(H) + + +def test_graph_is_tournament_one_node(): + G = random_tournament(1) + H = nx_parallel.ParallelDiGraph(G) + assert is_tournament(H) + + +def test_graph_is_tournament_zero_node(): + G = random_tournament(0) + H = nx_parallel.ParallelDiGraph(G) + assert is_tournament(H) + + +def test_hamiltonian_empty_graph(): + path = hamiltonian_path(nx_parallel.ParallelDiGraph()) + assert len(path) == 0 + + +def test_path_is_hamiltonian(): + G = DiGraph() + G.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 0), (1, 3), (0, 2)]) + H = nx_parallel.ParallelDiGraph(G) + path = hamiltonian_path(H) + assert len(path) == 4 + assert all(v in H[u] for u, v in zip(path, path[1:])) + + +def test_hamiltonian_cycle(): + """Tests that :func:`networkx.tournament.hamiltonian_path` + returns a Hamiltonian cycle when provided a strongly connected + tournament. + + """ + G = DiGraph() + G.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 0), (1, 3), (0, 2)]) + H = nx_parallel.ParallelDiGraph(G) + path = hamiltonian_path(H) + assert len(path) == 4 + assert all(v in G[u] for u, v in zip(path, path[1:])) + assert path[0] in H[path[-1]] + + +def test_score_sequence_edge(): + G = DiGraph([(0, 1)]) + H = nx_parallel.ParallelDiGraph(G) + assert score_sequence(H) == [0, 1] + + +def test_score_sequence_triangle(): + G = DiGraph([(0, 1), (1, 2), (2, 0)]) + H = nx_parallel.ParallelDiGraph(G) + assert score_sequence(H) == [1, 1, 1] + + +def test_tournament_matrix(): + np = pytest.importorskip("numpy") + pytest.importorskip("scipy") + npt = np.testing + G = DiGraph([(0, 1)]) + H = nx_parallel.ParallelDiGraph(G) + m = tournament_matrix(H) + npt.assert_array_equal(m.todense(), np.array([[0, 1], [-1, 0]])) + + +def test_reachable_pair(): + """Tests for a reachable pair of nodes.""" + G = DiGraph([(0, 1), (1, 2), (2, 0)]) + H = nx_parallel.ParallelDiGraph(G) + assert is_reachable(H, 0, 2) + + +def test_same_node_is_reachable(): + """Tests that a node is always reachable from it.""" + # G is an arbitrary tournament on ten nodes. + G = DiGraph(sorted(p) for p in combinations(range(10), 2)) + H = nx_parallel.ParallelDiGraph(G) + assert all(is_reachable(H, v, v) for v in H) + + +def test_unreachable_pair(): + """Tests for an unreachable pair of nodes.""" + G = DiGraph([(0, 1), (0, 2), (1, 2)]) + H = nx_parallel.ParallelDiGraph(G) + assert not is_reachable(H, 1, 0) + + +def test_is_strongly_connected(): + """Tests for a strongly connected tournament.""" + G = DiGraph([(0, 1), (1, 2), (2, 0)]) + H = nx_parallel.ParallelDiGraph(G) + assert is_strongly_connected(H) + + +def test_not_strongly_connected(): + """Tests for a tournament that is not strongly connected.""" + G = DiGraph([(0, 1), (0, 2), (1, 2)]) + H = nx_parallel.ParallelDiGraph(G) + assert not is_strongly_connected(H) diff --git a/nx_parallel/algorithms/tests/test_vitality.py b/nx_parallel/algorithms/tests/test_vitality.py new file mode 100644 index 0000000..dd68e98 --- /dev/null +++ b/nx_parallel/algorithms/tests/test_vitality.py @@ -0,0 +1,52 @@ +"""Modified unit tests ifor backend from :mod:`networkx.algorithms.vitality`""" + +import networkx as nx +import nx_parallel + + +class TestClosenessVitality: + def test_unweighted(self): + G = nx.cycle_graph(3) + H = nx_parallel.ParallelGraph(G) + vitality = nx.closeness_vitality(H) + assert vitality == {0: 2, 1: 2, 2: 2} + + def test_weighted(self): + G = nx.Graph() + nx.add_cycle(G, [0, 1, 2], weight=2) + H = nx_parallel.ParallelGraph(G) + vitality = nx.closeness_vitality(H, weight="weight") + assert vitality == {0: 4, 1: 4, 2: 4} + + def test_unweighted_digraph(self): + G = nx.DiGraph(nx.cycle_graph(3)) + print(G) + H = nx_parallel.ParallelDiGraph(G) + print(H) + vitality = nx.closeness_vitality(H) + assert vitality == {0: 4, 1: 4, 2: 4} + + def test_weighted_digraph(self): + G = nx.DiGraph() + nx.add_cycle(G, [0, 1, 2], weight=2) + nx.add_cycle(G, [2, 1, 0], weight=2) + H = nx_parallel.ParallelDiGraph(G) + vitality = nx.closeness_vitality(H, weight="weight") + assert vitality == {0: 8, 1: 8, 2: 8} + + def test_weighted_multidigraph(self): + G = nx.MultiDiGraph() + nx.add_cycle(G, [0, 1, 2], weight=2) + nx.add_cycle(G, [2, 1, 0], weight=2) + H = nx_parallel.ParallelMultiDiGraph(G) + vitality = nx.closeness_vitality(H, weight="weight") + assert vitality == {0: 8, 1: 8, 2: 8} + + def test_disconnecting_graph(self): + """Tests that the closeness vitality of a node whose removal + disconnects the graph is negative infinity. + + """ + G = nx.path_graph(3) + nx_parallel.ParallelGraph(G) + assert nx.closeness_vitality(G, node=1) == -float("inf") diff --git a/nx_parallel/algorithms/tournament.py b/nx_parallel/algorithms/tournament.py new file mode 100644 index 0000000..9d602b4 --- /dev/null +++ b/nx_parallel/algorithms/tournament.py @@ -0,0 +1,211 @@ +from joblib import Parallel, cpu_count, delayed +import networkx as nx +from nx_parallel.algorithms.utils.chunk import chunks +from networkx.algorithms.simple_paths import is_simple_path as is_path + +__all__ = [ + "is_reachable", + "tournament_is_strongly_connected", +] + +"""Identical to networkx implementation""" + + +def index_satisfying(iterable, condition): + return nx.algorithms.tournament.index_satisfying(iterable, condition) + + +"""Identical to networkx implementation""" + + +def is_tournament(G): + return nx.algorithms.tournament.is_tournament(G.originalGraph) + + +"""Identical to networkx implementation""" + + +def hamiltonian_path(G): + return nx.algorithms.tournament.hamiltonian_path(G.originalGraph) + + +"""Identical to networkx implementation""" + + +def random_tournament(n, seed=None): + return nx.algorithms.tournament.random_tournament(n, seed) + + +"""Identical to networkx implementation""" + + +def score_sequence(G): + return nx.algorithms.tournament.score_sequence(G.originalGraph) + + +"""Identical to networkx implementation""" + + +def tournament_matrix(G): + return nx.algorithms.tournament.tournament_matrix(G.originalGraph) + + +def is_reachable(G, s, t): + """Decides whether there is a path from `s` to `t` in the + tournament. + + This function is more theoretically efficient than the reachability + checks than the shortest path algorithms in + :mod:`networkx.algorithms.shortest_paths`. + + The given graph **must** be a tournament, otherwise this function's + behavior is undefined. + + Parameters + ---------- + G : NetworkX graph + A directed graph representing a tournament. + + s : node + A node in the graph. + + t : node + A node in the graph. + + Returns + ------- + bool + Whether there is a path from `s` to `t` in `G`. + + Examples + -------- + >>> from networkx.algorithms import tournament + >>> G = nx.DiGraph([(1, 0), (1, 3), (1, 2), (2, 3), (2, 0), (3, 0)]) + >>> tournament.is_reachable(G, 1, 3) + True + >>> tournament.is_reachable(G, 3, 2) + False + + Notes + ----- + Although this function is more theoretically efficient than the + generic shortest path functions, a speedup requires the use of + parallelism. Though it may in the future, the current implementation + does not use parallelism, thus you may not see much of a speedup. + + This algorithm comes from [1]. + + References + ---------- + .. [1] Tantau, Till. + "A note on the complexity of the reachability problem for + tournaments." + *Electronic Colloquium on Computational Complexity*. 2001. + + """ + + """Subset version of two_neighborhood""" + + def two_neighborhood_subset(G, chunk): + reList = set() + for v in chunk: + reList.update( + { + x + for x in G + if x == v + or x in G[v] + or any(is_path(G.originalGraph, [v, z, x]) for z in G) + } + ) + return reList + + """Identical to networkx helper implementation""" + + def is_closed(G, nodes): + return all(v in G[u] for u in set(G) - nodes for v in nodes) + + """helper to check closure conditions for chunk (iterable) of neighborhoods""" + + def check_closure_subset(chunk): + return all(not (is_closed(G, S) and s in S and t not in S) for S in chunk) + + num_chunks = max(len(G) // cpu_count(), 1) + + # send chunk of vertices to each process (calculating neighborhoods) + node_chunks = list(chunks(G.nodes, num_chunks)) + neighborhoods = Parallel(n_jobs=-1)( + delayed(two_neighborhood_subset)(G, chunk) for chunk in node_chunks + ) + + # send chunk of neighborhoods to each process (checking closure conditions) + neighborhood_chunks = list(chunks(neighborhoods, num_chunks)) + results = Parallel(n_jobs=-1, backend="loky")( + delayed(check_closure_subset)(chunk) for chunk in neighborhood_chunks + ) + return all(results) + + +def tournament_is_strongly_connected(G): + """Decides whether the given tournament is strongly connected. + + This function is more theoretically efficient than the + :func:`~networkx.algorithms.components.is_strongly_connected` + function. + + The given graph **must** be a tournament, otherwise this function's + behavior is undefined. + + Parameters + ---------- + G : NetworkX graph + A directed graph representing a tournament. + + Returns + ------- + bool + Whether the tournament is strongly connected. + + Examples + -------- + >>> from networkx.algorithms import tournament + >>> G = nx.DiGraph([(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3), (3, 0)]) + >>> tournament.is_strongly_connected(G) + True + >>> G.remove_edge(1, 3) + >>> tournament.is_strongly_connected(G) + False + + Notes + ----- + Although this function is more theoretically efficient than the + generic strong connectivity function, a speedup requires the use of + parallelism. Though it may in the future, the current implementation + does not use parallelism, thus you may not see much of a speedup. + + This algorithm comes from [1]. + + References + ---------- + .. [1] Tantau, Till. + "A note on the complexity of the reachability problem for + tournaments." + *Electronic Colloquium on Computational Complexity*. 2001. + + + """ + + """Subset version of is_reachable""" + + def is_reachable_subset(G, chunk): + re = set() + for v in chunk: + re.update(is_reachable(G, u, v) for u in G) + return all(re) + + num_chunks = max(len(G) // cpu_count(), 1) + node_chunks = list(chunks(G.nodes, num_chunks)) + results = Parallel(n_jobs=-1)( + delayed(is_reachable_subset)(G, chunk) for chunk in node_chunks + ) + return all(results) diff --git a/nx_parallel/algorithms/utils/__init__.py b/nx_parallel/algorithms/utils/__init__.py new file mode 100644 index 0000000..253f731 --- /dev/null +++ b/nx_parallel/algorithms/utils/__init__.py @@ -0,0 +1 @@ +from .chunk import * diff --git a/nx_parallel/algorithms/utils/chunk.py b/nx_parallel/algorithms/utils/chunk.py new file mode 100644 index 0000000..0167899 --- /dev/null +++ b/nx_parallel/algorithms/utils/chunk.py @@ -0,0 +1,10 @@ +import itertools + +"""Divides an iterable into chunks of size n""" +def chunks(l, n): + l_c = iter(l) + while True: + x = tuple(itertools.islice(l_c, n)) + if not x: + return + yield x \ No newline at end of file diff --git a/nx_parallel/algorithms/vitality.py b/nx_parallel/algorithms/vitality.py new file mode 100644 index 0000000..9cbd64a --- /dev/null +++ b/nx_parallel/algorithms/vitality.py @@ -0,0 +1,74 @@ +from functools import partial +from joblib import Parallel, delayed +import networkx as nx +from nx_parallel.classes.graph import ParallelGraph, ParallelDiGraph,ParallelMultiDiGraph, ParallelMultiGraph + +__all__ = ["closeness_vitality"] + +def closeness_vitality(G, node=None, weight=None, wiener_index=None): + """Returns the closeness vitality for nodes in the graph. Parallel implementation. + + The *closeness vitality* of a node, defined in Section 3.6.2 of [1], + is the change in the sum of distances between all node pairs when + excluding that node. + + Parameters + ---------- + G : NetworkX graph + A strongly-connected graph. + + weight : string + The name of the edge attribute used as weight. This is passed + directly to the :func:`~networkx.wiener_index` function. + + node : object + If specified, only the closeness vitality for this node will be + returned. Otherwise, a dictionary mapping each node to its + closeness vitality will be returned. + + Other parameters + ---------------- + wiener_index : number + If you have already computed the Wiener index of the graph + `G`, you can provide that value here. Otherwise, it will be + computed for you. + + Returns + ------- + dictionary or float + If `node` is None, this function returns a dictionary + with nodes as keys and closeness vitality as the + value. Otherwise, it returns only the closeness vitality for the + specified `node`. + + The closeness vitality of a node may be negative infinity if + removing that node would disconnect the graph. + + Examples + -------- + >>> G = nx.cycle_graph(3) + >>> nx.closeness_vitality(G) + {0: 2.0, 1: 2.0, 2: 2.0} + + See Also + -------- + closeness_centrality + + References + ---------- + .. [1] Ulrik Brandes, Thomas Erlebach (eds.). + *Network Analysis: Methodological Foundations*. + Springer, 2005. + + + """ + if wiener_index is None: + wiener_index = nx.wiener_index(G.originalGraph, weight=weight) + if node is not None: + after = nx.wiener_index(G.subgraph(set(G) - {node}), weight=weight) + return wiener_index - after + vitality = partial(closeness_vitality, G.originalGraph, weight=weight, wiener_index=wiener_index) + result = Parallel(n_jobs=-1)( + delayed(lambda v: (v, vitality(v)))(v) for v in G.originalGraph + ) + return dict(result) diff --git a/nx_parallel/centrality.py b/nx_parallel/centrality.py deleted file mode 100644 index abba84c..0000000 --- a/nx_parallel/centrality.py +++ /dev/null @@ -1,44 +0,0 @@ -from joblib import Parallel, delayed -from networkx.algorithms.centrality.betweenness import ( - _single_source_shortest_path_basic, - _accumulate_endpoints, - _accumulate_basic, - _rescale, - _single_source_dijkstra_path_basic, -) - -__all__ = ["betweenness_centrality"] - - -def betweenness_centrality( - G, k=None, normalized=True, weight=None, endpoints=False, seed=None -): - betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G - if k is None: - nodes = G - else: - nodes = seed.sample(list(G.nodes()), k) - - def __node_loop(nodes): - for s in nodes: - # single source shortest paths - if weight is None: # use BFS - S, P, sigma, _ = _single_source_shortest_path_basic(G, s) - else: # use Dijkstra's algorithm - S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight) - # accumulation - if endpoints: - betweenness, _ = _accumulate_endpoints(betweenness, S, P, sigma, s) - else: - betweenness, _ = _accumulate_basic(betweenness, S, P, sigma, s) - - # rescaling - betweenness = _rescale( - betweenness, - len(G), - normalized=normalized, - directed=G.is_directed(), - k=k, - endpoints=endpoints, - ) - return betweenness diff --git a/nx_parallel/classes/__init__.py b/nx_parallel/classes/__init__.py new file mode 100644 index 0000000..84a54e6 --- /dev/null +++ b/nx_parallel/classes/__init__.py @@ -0,0 +1 @@ +from .graph import * diff --git a/nx_parallel/classes/graph.py b/nx_parallel/classes/graph.py new file mode 100644 index 0000000..cd3329d --- /dev/null +++ b/nx_parallel/classes/graph.py @@ -0,0 +1,52 @@ +from networkx import Graph, DiGraph, MultiDiGraph, MultiGraph + +__all__ = [ + "ParallelGraph", + "ParallelDiGraph", + "ParallelMultiDiGraph", + "ParallelMultiGraph", +] + + +class ParallelGraph(Graph): + __networkx_plugin__ = "parallel" + + def __init__(self, incoming_graph_data=None, **attr): + super().__init__(incoming_graph_data, **attr) + self.originalGraph = Graph(self) + + def to_networkx(self): + return Graph(self) + + +class ParallelDiGraph(DiGraph): + __networkx_plugin__ = "parallel" + + def __init__(self, incoming_graph_data=None, **attr): + super().__init__(incoming_graph_data, **attr) + self.originalGraph = DiGraph(self) + + def to_networkx(self): + return DiGraph(self) + + +class ParallelMultiGraph(MultiGraph): + __networkx_plugin__ = "parallel" + + def __init__(self, incoming_graph_data=None, **attr): + super().__init__(incoming_graph_data, **attr) + self.originalGraph = MultiGraph(self) + + def to_networkx(self): + return MultiGraph(self) + + +class ParallelMultiDiGraph(MultiDiGraph): + __networkx_plugin__ = "parallel" + + def __init__(self, incoming_graph_data=None, **attr): + super().__init__(incoming_graph_data, **attr) + self.originalGraph = MultiDiGraph(self) + + def to_networkx(self): + return MultiDiGraph(self) diff --git a/nx_parallel/graph.py b/nx_parallel/graph.py deleted file mode 100644 index b098bbe..0000000 --- a/nx_parallel/graph.py +++ /dev/null @@ -1,13 +0,0 @@ -from networkx import Graph - -__all__ = ["ParallelGraph"] - - -class ParallelGraph(Graph): - __networkx_plugin__ = "parallel" - - def __init__(self, incoming_graph_data=None, **attr): - super().__init__(incoming_graph_data, **attr) - - def to_networkx(self): - return Graph(self) diff --git a/nx_parallel/interface.py b/nx_parallel/interface.py index 996ff3e..20bc393 100644 --- a/nx_parallel/interface.py +++ b/nx_parallel/interface.py @@ -1,42 +1,181 @@ -from .centrality import betweenness_centrality +import networkx as nx +from networkx import DiGraph, Graph, MultiDiGraph, MultiGraph +from .classes.graph import ( + ParallelGraph, + ParallelDiGraph, + ParallelMultiDiGraph, + ParallelMultiGraph, +) +from .algorithms.centrality.betweenness import betweenness_centrality +from .algorithms.isolate import number_of_isolates, isolates, is_isolate +from .algorithms.vitality import closeness_vitality +from .algorithms.tournament import ( + hamiltonian_path, + is_reachable, + tournament_is_strongly_connected, + is_tournament, + random_tournament, + score_sequence, + tournament_matrix, +) +from .algorithms.efficiency_measures import ( + efficiency, + local_efficiency, + global_efficiency, +) + __all__ = ["Dispatcher"] +def convert(graph): + if isinstance(graph, MultiDiGraph): + return ParallelMultiDiGraph(graph) + if isinstance(graph, MultiGraph): + return ParallelMultiGraph(graph) + if isinstance(graph, DiGraph): + return ParallelDiGraph(graph) + if isinstance(graph, Graph): + return ParallelGraph(graph) + raise TypeError(f"Unsupported type of graph: {type(graph)}") + + class Dispatcher: + # ============================= + + # Isolates + number_of_isolates = number_of_isolates + isolates = isolates + is_isolate = is_isolate + + # Vitality + closeness_vitality = closeness_vitality + + # Tournament + is_tournament = is_tournament + hamiltonian_path = hamiltonian_path + random_tournament = random_tournament + score_sequence = score_sequence + tournament_matrix = tournament_matrix + is_reachable = is_reachable + tournament_is_strongly_connected = tournament_is_strongly_connected + + # Centrality betweenness_centrality = betweenness_centrality + # Efficiency + efficiency = efficiency + local_efficiency = local_efficiency + global_efficiency = global_efficiency + + # ============================= + def __getattr__(self, item): + try: + return nx.utils.backends._registered_algorithms[item].__wrapped__ + except KeyError: + raise AttributeError(item) from None + @staticmethod - def convert_from_nx(incoming_graph, weight=None, *, name=None): - import networkx as nx - from .graph import ParallelGraph + def convert_from_nx( + graph, + *, + edge_attrs=None, + node_attrs=None, + preserve_edge_attrs=None, + preserve_node_attrs=None, + preserve_graph_attrs=None, + name=None, + graph_name=None, + ): + if name in { + # Raise if input graph changes + "lexicographical_topological_sort", + "topological_generations", + "topological_sort", + # Sensitive tests (iteration order matters) + "dfs_labeled_edges", + }: + return graph + if not isinstance(graph, Graph): + if name == "is_partition": + # May be NodeView + return graph + raise TypeError( + f"Bad type for graph argument {graph_name} in {name}: type(graph)" + ) + + G = graph.__class__() + + if preserve_graph_attrs: + G.graph.update(graph.graph) - if isinstance(incoming_graph, nx.Graph): - return ParallelGraph(incoming_graph) - raise TypeError(f"Unsupported type of graph: {type(incoming_graph)}") + if preserve_node_attrs: + G.add_nodes_from(graph.nodes(data=True)) + elif node_attrs: + G.add_nodes_from( + ( + node, + { + k: datadict.get(k, default) + for k, default in node_attrs.items() + if default is not None or k in datadict + }, + ) + for node, datadict in graph.nodes(data=True) + ) + else: + G.add_nodes_from(graph) + + if graph.is_multigraph(): + if preserve_edge_attrs: + G.add_edges_from( + (u, v, key, datadict) + for u, nbrs in graph._adj.items() + for v, keydict in nbrs.items() + for key, datadict in keydict.items() + ) + elif edge_attrs: + G.add_edges_from( + ( + u, + v, + key, + { + k: datadict.get(k, default) + for k, default in edge_attrs.items() + if default is not None or k in datadict + }, + ) + for u, nbrs in graph._adj.items() + for v, keydict in nbrs.items() + for key, datadict in keydict.items() + ) + else: + G.add_edges_from( + (u, v, key, {}) + for u, nbrs in graph._adj.items() + for v, keydict in nbrs.items() + for key, datadict in keydict.items() + ) + elif preserve_edge_attrs: + G.add_edges_from(graph.edges(data=True)) + elif edge_attrs: + G.add_edges_from( + ( + u, + v, + { + k: datadict.get(k, default) + for k, default in edge_attrs.items() + if default is not None or k in datadict + }, + ) + for u, v, datadict in graph.edges(data=True) + ) + else: + G.add_edges_from(graph.edges) + return G @staticmethod def convert_to_nx(obj, *, name=None): - from .graph import ParallelGraph - - if isinstance(obj, ParallelGraph): - obj = obj.to_networkx() return obj - - # @staticmethod - # def on_start_tests(items): - # try: - # import pytest - # except ImportError: # pragma: no cover (import) - # return - # skip = [ - # ("test_attributes", {"TestBoruvka", "test_mst.py"}), - # ("test_weight_attribute", {"TestBoruvka", "test_mst.py"}), - # ] - # for item in items: - # kset = set(item.keywords) - # for test_name, keywords in skip: - # if item.name == test_name and keywords.issubset(kset): - # item.add_marker( - # pytest.mark.xfail(reason="unable to handle multi-attributed graphs") - # ) diff --git a/pyproject.toml b/pyproject.toml index d9a844a..8eaca90 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,6 +2,9 @@ requires = ["setuptools", "setuptools-scm"] build-backend = "setuptools.build_meta" +[tool.setuptools] +py-modules = [] + [project] name = "nx_parallel" authors = [ diff --git a/timing/heatmap_all_functions.png b/timing/heatmap_all_functions.png new file mode 100644 index 0000000..dec0613 Binary files /dev/null and b/timing/heatmap_all_functions.png differ diff --git a/timing/heatmap_betweenness_centrality_timing.png b/timing/heatmap_betweenness_centrality_timing.png new file mode 100644 index 0000000..4913615 Binary files /dev/null and b/timing/heatmap_betweenness_centrality_timing.png differ diff --git a/timing/heatmap_closeness_vitality_timing.png b/timing/heatmap_closeness_vitality_timing.png new file mode 100644 index 0000000..128a788 Binary files /dev/null and b/timing/heatmap_closeness_vitality_timing.png differ diff --git a/timing/heatmap_is_reachable_timing.png b/timing/heatmap_is_reachable_timing.png new file mode 100644 index 0000000..47f3a51 Binary files /dev/null and b/timing/heatmap_is_reachable_timing.png differ diff --git a/timing/heatmap_local_efficiency_timing.png b/timing/heatmap_local_efficiency_timing.png new file mode 100644 index 0000000..97d9bcd Binary files /dev/null and b/timing/heatmap_local_efficiency_timing.png differ diff --git a/timing/timing_all_functions.py b/timing/timing_all_functions.py new file mode 100644 index 0000000..ff282ca --- /dev/null +++ b/timing/timing_all_functions.py @@ -0,0 +1,76 @@ +from matplotlib import pyplot as plt +import networkx as nx +import nx_parallel +import time +import seaborn as sns +import pandas as pd + +# Code to create README heatmap for all functions in function_list +heatmapDF = pd.DataFrame() +function_list = [nx.betweenness_centrality, nx.closeness_vitality, nx.local_efficiency] +number_of_nodes_list = [10, 20, 50, 300, 600] + +for i in range(0, len(function_list)): + currFun = function_list[i] + for j in range(0, len(number_of_nodes_list)): + num = number_of_nodes_list[j] + + # create original and parallel graphs + G = nx.fast_gnp_random_graph(num, 0.5, directed=False) + H = nx_parallel.ParallelGraph(G) + + # time both versions and update heatmapDF + t1 = time.time() + c = currFun(H) + t2 = time.time() + parallelTime = t2 - t1 + t1 = time.time() + c = currFun(G) + t2 = time.time() + stdTime = t2 - t1 + timesFaster = stdTime / parallelTime + heatmapDF.at[j, i] = timesFaster + print("Finished " + str(currFun)) + +# Code to create for row of heatmap specifically for tournaments +for j in range(0, len(number_of_nodes_list)): + num = number_of_nodes_list[j] + G = nx.tournament.random_tournament(num) + H = nx_parallel.ParallelDiGraph(G) + t1 = time.time() + c = nx.tournament.is_reachable(H, 1, num) + t2 = time.time() + parallelTime = t2 - t1 + t1 = time.time() + c = nx.tournament.is_reachable(G, 1, num) + t2 = time.time() + stdTime = t2 - t1 + timesFaster = stdTime / parallelTime + heatmapDF.at[j, 3] = timesFaster + +# plotting the heatmap with numbers and a green color scheme +plt.figure(figsize=(20, 4)) +hm = sns.heatmap(data=heatmapDF.T, annot=True, cmap="Greens", cbar=True) + +# Remove the tick labels on both axes +hm.set_yticklabels( + [ + "betweenness_centrality", + "closeness_vitality", + "local_efficiency", + "tournament is_reachable", + ] +) + +# Adding x-axis labels +hm.set_xticklabels(number_of_nodes_list) + +# Rotating the x-axis labels for better readability (optional) +plt.xticks(rotation=45) +plt.yticks(rotation=20) +plt.title("Small Scale Demo: Times Speedups of nx_parallel compared to networkx") +plt.xlabel("Number of Vertices (edge probability of 0.5 except for tournaments)") +plt.ylabel("Algorithm") + +# displaying the plotted heatmap +plt.tight_layout() diff --git a/timing/timing_comparison.md b/timing/timing_comparison.md new file mode 100644 index 0000000..cbbd0ae --- /dev/null +++ b/timing/timing_comparison.md @@ -0,0 +1,29 @@ +Timing Comparisons + +--- + +Model: 13-inch MacBook Pro (2020) + +CPU: 2 GHz Quad-Core Intel Core i5 + +RAM: 16 GB LPDDR4X at 3733 MHz + +Code to generate heatmaps in timing_individual_function.py and timing_all_functions.py. + +### All parallelized functions at this time: + +![alt text](heatmap_all_functions.png) + +### Individual functions: + +betweenness_centrality +![alt text](heatmap_betweenness_centrality_timing.png) + +closeness_vitality +![alt text](heatmap_closeness_vitality_timing.png) + +local_efficiency +![alt text](heatmap_local_efficiency_timing.png) + +tournament is_reachable +![alt text](heatmap_is_reachable_timing.png) diff --git a/timing/timing_individual_function.py b/timing/timing_individual_function.py new file mode 100644 index 0000000..ac9a816 --- /dev/null +++ b/timing/timing_individual_function.py @@ -0,0 +1,75 @@ +from matplotlib import pyplot as plt +import networkx as nx +import nx_parallel +import time +import seaborn as sns +import pandas as pd + +# Code to create README heatmaps for individual function currFun +heatmapDF = pd.DataFrame() +number_of_nodes_list = [10, 50, 100, 300, 500] +pList = [1, 0.8, 0.6, 0.4, 0.2] +currFun = nx.betweenness_centrality +for i in range(0, len(pList)): + p = pList[i] + for j in range(0, len(number_of_nodes_list)): + num = number_of_nodes_list[j] + + # create original and parallel graphs + G = nx.fast_gnp_random_graph(num, 0.5, directed=False) + H = nx_parallel.ParallelGraph(G) + + # time both versions and update heatmapDF + t1 = time.time() + c = currFun(H) + t2 = time.time() + parallelTime = t2 - t1 + t1 = time.time() + c = currFun(G) + t2 = time.time() + stdTime = t2 - t1 + timesFaster = stdTime / parallelTime + heatmapDF.at[j, i] = timesFaster + print("Finished " + str(currFun)) + +# Code to create for row of heatmap specifically for tournaments +# for i in range(0, len(pList)): +# p = pList[i] +# for j in range(0, len(number_of_nodes_list)): +# num = number_of_nodes_list[j] +# G = nx.tournament.random_tournament(num) +# H = nx_parallel.ParallelDiGraph(G) +# t1 = time.time() +# c = nx.tournament.is_reachable(H, 1, num) +# t2 = time.time() +# parallelTime = t2-t1 +# t1 = time.time() +# c = nx.tournament.is_reachable(G, 1, num) +# t2 = time.time() +# stdTime = t2-t1 +# timesFaster = stdTime/parallelTime +# heatmapDF.at[j, 3] = timesFaster + +# plotting the heatmap with numbers and a green color scheme +plt.figure(figsize=(20, 4)) +hm = sns.heatmap(data=heatmapDF.T, annot=True, cmap="Greens", cbar=True) + +# Remove the tick labels on both axes +hm.set_yticklabels(pList) + +# Adding x-axis labels +hm.set_xticklabels(number_of_nodes_list) + +# Rotating the x-axis labels for better readability (optional) +plt.xticks(rotation=45) +plt.yticks(rotation=20) +plt.title( + "Small Scale Demo: Times Speedups of " + currFun.__name__ + " compared to networkx" +) +plt.xlabel("Number of Vertices") +plt.ylabel("Edge Probability") +print(currFun.__name__) + +# displaying the plotted heatmap +plt.tight_layout() +plt.savefig("timing/" + "heatmap_" + currFun.__name__ + "_timing.png")