Skip to content

Commit

Permalink
added update script and things from PR 55 (#57)
Browse files Browse the repository at this point in the history
  • Loading branch information
Schefflera-Arboricola authored Apr 2, 2024
1 parent 21d4158 commit ee40371
Show file tree
Hide file tree
Showing 5 changed files with 270 additions and 117 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ nx-parallel is a NetworkX backend that uses joblib for parallelization. This pro

```.py
import _nx_parallel as nxp
d = nxp.get_info()
for func in d.get("functions", {}):
print(f"- [{func}]({d['functions'][func]['url']})")
d = nxp.get_funcs_info() # temporarily add `from .update_get_info import *` to _nx_parallel/__init__.py
for func in d:
print(f"- [{func}]({d[func]['url']})")
```

</details>
Expand Down
229 changes: 117 additions & 112 deletions _nx_parallel/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
import ast
# This file was automatically generated by update_get_info.py


def get_info():
Expand All @@ -10,114 +9,120 @@ def get_info():
"package": "nx_parallel",
"url": "https://github.com/networkx/nx-parallel",
"short_summary": "Parallel backend for NetworkX algorithms",
"functions": get_funcs_info(),
"functions": {
"number_of_isolates": {
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/isolate.py#L8",
"additional_docs": "The parallel computation is implemented by dividing the list of isolated nodes into chunks and then finding the length of each chunk in parallel and then adding all the lengths at the end.",
"additional_parameters": None,
},
"square_clustering": {
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/cluster.py#L10",
"additional_docs": "The nodes are chunked into `node_chunks` and then the square clustering coefficient for all `node_chunks` are computed in parallel over all available CPU cores.",
"additional_parameters": {
'get_chunks : str, function (default = "chunks")': "A function that takes in a list of all the nodes (or nbunch) as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `nodes` into `n` chunks, where `n` is the number of CPU cores."
},
},
"local_efficiency": {
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/efficiency_measures.py#L9",
"additional_docs": "The parallel computation is implemented by dividing the nodes into chunks and then computing and adding global efficiencies of all node in all chunks, in parallel, and then adding all these sums and dividing by the total number of nodes at the end.",
"additional_parameters": None,
},
"closeness_vitality": {
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/vitality.py#L9",
"additional_docs": "The parallel computation is implemented only when the node is not specified. The closeness vitality for each node is computed concurrently.",
"additional_parameters": None,
},
"is_reachable": {
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/tournament.py#L10",
"additional_docs": "The function parallelizes the calculation of two neighborhoods of vertices in `G` and checks closure conditions for each neighborhood subset in parallel.",
"additional_parameters": None,
},
"is_strongly_connected": {
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/tournament.py#L54",
"additional_docs": "The parallel computation is implemented by dividing the nodes into chunks and then checking whether each node is reachable from each other node in parallel.",
"additional_parameters": None,
},
"all_pairs_node_connectivity": {
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/approximation/connectivity.py#L12",
"additional_docs": "The parallel implementation first divides the a list of all permutation (in case of directed graphs) and combinations (in case of undirected graphs) of `nbunch` into chunks and then creates a generator to lazily compute the local node connectivities for each chunk, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores. At the end, the results are aggregated into a single dictionary and returned.",
"additional_parameters": {
'get_chunks : str, function (default = "chunks")': "A function that takes in `list(iter_func(nbunch, 2))` as input and returns an iterable `pairs_chunks`, here `iter_func` is `permutations` in case of directed graphs and `combinations` in case of undirected graphs. The default is to create chunks by slicing the list into `n` chunks, where `n` is the number of CPU cores, such that size of each chunk is atmost 10, and at least 1."
},
},
"betweenness_centrality": {
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/centrality/betweenness.py#L16",
"additional_docs": "The parallel computation is implemented by dividing the nodes into chunks and computing betweenness centrality for each chunk concurrently.",
"additional_parameters": None,
},
"node_redundancy": {
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/bipartite/redundancy.py#L11",
"additional_docs": "In the parallel implementation we divide the nodes into chunks and compute the node redundancy coefficients for all `node_chunk` in parallel.",
"additional_parameters": {
'get_chunks : str, function (default = "chunks")': "A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` (or `nodes`) into `n` chunks, where `n` is the number of CPU cores."
},
},
"all_pairs_dijkstra": {
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/shortest_paths/weighted.py#L28",
"additional_docs": "The parallel implementation first divides the nodes into chunks and then creates a generator to lazily compute shortest paths and lengths for each `node_chunk`, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores.",
"additional_parameters": {
'get_chunks : str, function (default = "chunks")': "A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores."
},
},
"all_pairs_dijkstra_path_length": {
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/shortest_paths/weighted.py#L71",
"additional_docs": "The parallel implementation first divides the nodes into chunks and then creates a generator to lazily compute shortest paths lengths for each node in `node_chunk`, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores.",
"additional_parameters": {
'get_chunks : str, function (default = "chunks")': "A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores."
},
},
"all_pairs_dijkstra_path": {
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/shortest_paths/weighted.py#L121",
"additional_docs": "The parallel implementation first divides the nodes into chunks and then creates a generator to lazily compute shortest paths for each `node_chunk`, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores.",
"additional_parameters": {
'get_chunks : str, function (default = "chunks")': "A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores."
},
},
"all_pairs_bellman_ford_path_length": {
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/shortest_paths/weighted.py#L164",
"additional_docs": "The parallel implementation first divides the nodes into chunks and then creates a generator to lazily compute shortest paths lengths for each node in `node_chunk`, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores.",
"additional_parameters": {
'get_chunks : str, function (default = "chunks")': "A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores."
},
},
"all_pairs_bellman_ford_path": {
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/shortest_paths/weighted.py#L209",
"additional_docs": "The parallel implementation first divides the nodes into chunks and then creates a generator to lazily compute shortest paths for each node_chunk, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores.",
"additional_parameters": {
'get_chunks : str, function (default = "chunks")': "A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores."
},
},
"johnson": {
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/shortest_paths/weighted.py#L252",
"additional_docs": "The parallel computation is implemented by dividing the nodes into chunks and computing the shortest paths using Johnson's Algorithm for each chunk in parallel.",
"additional_parameters": {
'get_chunks : str, function (default = "chunks")': "A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores."
},
},
"all_pairs_all_shortest_paths": {
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/shortest_paths/generic.py#L10",
"additional_docs": "The parallel implementation first divides the nodes into chunks and then creates a generator to lazily compute all shortest paths between all nodes for each node in `node_chunk`, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores.",
"additional_parameters": {
'get_chunks : str, function (default = "chunks")': "A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores."
},
},
"all_pairs_shortest_path_length": {
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/shortest_paths/unweighted.py#L18",
"additional_docs": "The parallel implementation first divides the nodes into chunks and then creates a generator to lazily compute shortest paths lengths for each node in `node_chunk`, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores.",
"additional_parameters": {
'get_chunks : str, function (default = "chunks")': "A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores."
},
},
"all_pairs_shortest_path": {
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/shortest_paths/unweighted.py#L62",
"additional_docs": "The parallel implementation first divides the nodes into chunks and then creates a generator to lazily compute shortest paths for each `node_chunk`, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores.",
"additional_parameters": {
'get_chunks : str, function (default = "chunks")': "A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores."
},
},
},
}


def get_funcs_info():
"""Return a dictionary with information about all the functions."""
funcs = {}

nx_parallel_dir = os.path.join(os.getcwd(), "nx_parallel")
for root, dirs, files in os.walk(nx_parallel_dir):
for file in files:
if (
file.endswith(".py")
and file != "__init__.py"
and not file.startswith("test_")
):
path = os.path.join(root, file)
d = extract_docstrings_from_file(path)
for func in d:
par_docs, par_params = extract_from_docs(d[func])
funcs[func] = {
"url": get_url(path, func),
"additional_docs": par_docs,
"additional_parameters": par_params,
}
return funcs


def extract_docstrings_from_file(file_path):
"""
Extract docstrings from functions listed in the __all__ list of a Python file.
Args:
- file_path: The path to the Python file.
Returns:
- A dictionary mapping function names to their docstrings.
"""
docstrings = {}
with open(file_path, "r") as f:
tree = ast.parse(f.read(), filename=file_path)
all_list = None
for node in tree.body:
if isinstance(node, ast.Assign):
if (
isinstance(node.targets[0], ast.Name)
and node.targets[0].id == "__all__"
):
all_list = [
expr.s for expr in node.value.elts if isinstance(expr, ast.Str)
]
elif isinstance(node, ast.FunctionDef):
if all_list and node.name in all_list:
docstring = ast.get_docstring(node) or "No docstring found."
docstrings[node.name] = docstring
return docstrings


def extract_from_docs(docstring):
"""Extract the parallel documentation and parallel parameter description from the given doctring."""
try:
# Extracting Parallel Computation description
# Assuming that the first para in docstring is the function's PC desc
# "par" is short for "parallel"
par_docs_ = docstring.split("\n\n")[0]
par_docs_ = par_docs_.split("\n")
par_docs_ = [line.strip() for line in par_docs_ if line.strip()]
par_docs = "\n".join(par_docs_)
except IndexError:
par_docs = None
except Exception as e:
print(e)
par_docs = None

try:
# Extracting extra parameters
# Assuming that the last para in docstring is the function's extra params
par_params = {}
par_params_ = docstring.split("------------\n")[1]

par_params_ = par_params_.split("\n\n\n")
for i in par_params_:
j = i.split("\n")
par_params[j[0]] = "\n".join(
[line.strip() for line in j[1:] if line.strip()]
)
if i == par_params_[-1]:
par_params[j[0]] = "\n".join(
[line.strip() for line in j[1:-1] if line.strip()]
)
except IndexError:
par_params = None
except Exception as e:
print(e)
par_params = None
return par_docs, par_params


def get_url(file_path, function_name):
"""Return the URL to the given function in the given file."""
file_url = (
"https://github.com/networkx/nx-parallel/blob/main/nx_parallel"
+ file_path.split("nx_parallel")[-1]
+ "#L"
)
with open(file_path, "r") as f:
tree = ast.parse(f.read(), filename=file_path)
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef) and node.name == function_name:
return file_url + str(node.lineno)
return file_url
12 changes: 12 additions & 0 deletions _nx_parallel/script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash

python _nx_parallel/update_get_info.py

ruff format "_nx_parallel/temp__init__.py"

# Check if there's any difference between the original file and the formatted one
if diff -q "_nx_parallel/__init__.py" "_nx_parallel/temp__init__.py" >/dev/null; then
rm "_nx_parallel/temp__init__.py"
else
mv "_nx_parallel/temp__init__.py" "_nx_parallel/__init__.py"
fi
Loading

0 comments on commit ee40371

Please sign in to comment.