diff --git a/README.md b/README.md index e9691fa..66c6363 100644 --- a/README.md +++ b/README.md @@ -20,9 +20,9 @@ nx-parallel is a NetworkX backend that uses joblib for parallelization. This pro ```.py import _nx_parallel as nxp -d = nxp.get_info() -for func in d.get("functions", {}): - print(f"- [{func}]({d['functions'][func]['url']})") +d = nxp.get_funcs_info() # temporarily add `from .update_get_info import *` to _nx_parallel/__init__.py +for func in d: + print(f"- [{func}]({d[func]['url']})") ``` diff --git a/_nx_parallel/__init__.py b/_nx_parallel/__init__.py index 9657a87..2543c45 100644 --- a/_nx_parallel/__init__.py +++ b/_nx_parallel/__init__.py @@ -1,5 +1,4 @@ -import os -import ast +# This file was automatically generated by update_get_info.py def get_info(): @@ -10,114 +9,120 @@ def get_info(): "package": "nx_parallel", "url": "https://github.com/networkx/nx-parallel", "short_summary": "Parallel backend for NetworkX algorithms", - "functions": get_funcs_info(), + "functions": { + "number_of_isolates": { + "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/isolate.py#L8", + "additional_docs": "The parallel computation is implemented by dividing the list of isolated nodes into chunks and then finding the length of each chunk in parallel and then adding all the lengths at the end.", + "additional_parameters": None, + }, + "square_clustering": { + "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/cluster.py#L10", + "additional_docs": "The nodes are chunked into `node_chunks` and then the square clustering coefficient for all `node_chunks` are computed in parallel over all available CPU cores.", + "additional_parameters": { + 'get_chunks : str, function (default = "chunks")': "A function that takes in a list of all the nodes (or nbunch) as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `nodes` into `n` chunks, where `n` is the number of CPU cores." + }, + }, + "local_efficiency": { + "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/efficiency_measures.py#L9", + "additional_docs": "The parallel computation is implemented by dividing the nodes into chunks and then computing and adding global efficiencies of all node in all chunks, in parallel, and then adding all these sums and dividing by the total number of nodes at the end.", + "additional_parameters": None, + }, + "closeness_vitality": { + "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/vitality.py#L9", + "additional_docs": "The parallel computation is implemented only when the node is not specified. The closeness vitality for each node is computed concurrently.", + "additional_parameters": None, + }, + "is_reachable": { + "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/tournament.py#L10", + "additional_docs": "The function parallelizes the calculation of two neighborhoods of vertices in `G` and checks closure conditions for each neighborhood subset in parallel.", + "additional_parameters": None, + }, + "is_strongly_connected": { + "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/tournament.py#L54", + "additional_docs": "The parallel computation is implemented by dividing the nodes into chunks and then checking whether each node is reachable from each other node in parallel.", + "additional_parameters": None, + }, + "all_pairs_node_connectivity": { + "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/approximation/connectivity.py#L12", + "additional_docs": "The parallel implementation first divides the a list of all permutation (in case of directed graphs) and combinations (in case of undirected graphs) of `nbunch` into chunks and then creates a generator to lazily compute the local node connectivities for each chunk, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores. At the end, the results are aggregated into a single dictionary and returned.", + "additional_parameters": { + 'get_chunks : str, function (default = "chunks")': "A function that takes in `list(iter_func(nbunch, 2))` as input and returns an iterable `pairs_chunks`, here `iter_func` is `permutations` in case of directed graphs and `combinations` in case of undirected graphs. The default is to create chunks by slicing the list into `n` chunks, where `n` is the number of CPU cores, such that size of each chunk is atmost 10, and at least 1." + }, + }, + "betweenness_centrality": { + "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/centrality/betweenness.py#L16", + "additional_docs": "The parallel computation is implemented by dividing the nodes into chunks and computing betweenness centrality for each chunk concurrently.", + "additional_parameters": None, + }, + "node_redundancy": { + "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/bipartite/redundancy.py#L11", + "additional_docs": "In the parallel implementation we divide the nodes into chunks and compute the node redundancy coefficients for all `node_chunk` in parallel.", + "additional_parameters": { + 'get_chunks : str, function (default = "chunks")': "A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` (or `nodes`) into `n` chunks, where `n` is the number of CPU cores." + }, + }, + "all_pairs_dijkstra": { + "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/shortest_paths/weighted.py#L28", + "additional_docs": "The parallel implementation first divides the nodes into chunks and then creates a generator to lazily compute shortest paths and lengths for each `node_chunk`, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores.", + "additional_parameters": { + 'get_chunks : str, function (default = "chunks")': "A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores." + }, + }, + "all_pairs_dijkstra_path_length": { + "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/shortest_paths/weighted.py#L71", + "additional_docs": "The parallel implementation first divides the nodes into chunks and then creates a generator to lazily compute shortest paths lengths for each node in `node_chunk`, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores.", + "additional_parameters": { + 'get_chunks : str, function (default = "chunks")': "A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores." + }, + }, + "all_pairs_dijkstra_path": { + "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/shortest_paths/weighted.py#L121", + "additional_docs": "The parallel implementation first divides the nodes into chunks and then creates a generator to lazily compute shortest paths for each `node_chunk`, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores.", + "additional_parameters": { + 'get_chunks : str, function (default = "chunks")': "A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores." + }, + }, + "all_pairs_bellman_ford_path_length": { + "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/shortest_paths/weighted.py#L164", + "additional_docs": "The parallel implementation first divides the nodes into chunks and then creates a generator to lazily compute shortest paths lengths for each node in `node_chunk`, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores.", + "additional_parameters": { + 'get_chunks : str, function (default = "chunks")': "A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores." + }, + }, + "all_pairs_bellman_ford_path": { + "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/shortest_paths/weighted.py#L209", + "additional_docs": "The parallel implementation first divides the nodes into chunks and then creates a generator to lazily compute shortest paths for each node_chunk, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores.", + "additional_parameters": { + 'get_chunks : str, function (default = "chunks")': "A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores." + }, + }, + "johnson": { + "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/shortest_paths/weighted.py#L252", + "additional_docs": "The parallel computation is implemented by dividing the nodes into chunks and computing the shortest paths using Johnson's Algorithm for each chunk in parallel.", + "additional_parameters": { + 'get_chunks : str, function (default = "chunks")': "A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores." + }, + }, + "all_pairs_all_shortest_paths": { + "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/shortest_paths/generic.py#L10", + "additional_docs": "The parallel implementation first divides the nodes into chunks and then creates a generator to lazily compute all shortest paths between all nodes for each node in `node_chunk`, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores.", + "additional_parameters": { + 'get_chunks : str, function (default = "chunks")': "A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores." + }, + }, + "all_pairs_shortest_path_length": { + "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/shortest_paths/unweighted.py#L18", + "additional_docs": "The parallel implementation first divides the nodes into chunks and then creates a generator to lazily compute shortest paths lengths for each node in `node_chunk`, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores.", + "additional_parameters": { + 'get_chunks : str, function (default = "chunks")': "A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores." + }, + }, + "all_pairs_shortest_path": { + "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/shortest_paths/unweighted.py#L62", + "additional_docs": "The parallel implementation first divides the nodes into chunks and then creates a generator to lazily compute shortest paths for each `node_chunk`, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores.", + "additional_parameters": { + 'get_chunks : str, function (default = "chunks")': "A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores." + }, + }, + }, } - - -def get_funcs_info(): - """Return a dictionary with information about all the functions.""" - funcs = {} - - nx_parallel_dir = os.path.join(os.getcwd(), "nx_parallel") - for root, dirs, files in os.walk(nx_parallel_dir): - for file in files: - if ( - file.endswith(".py") - and file != "__init__.py" - and not file.startswith("test_") - ): - path = os.path.join(root, file) - d = extract_docstrings_from_file(path) - for func in d: - par_docs, par_params = extract_from_docs(d[func]) - funcs[func] = { - "url": get_url(path, func), - "additional_docs": par_docs, - "additional_parameters": par_params, - } - return funcs - - -def extract_docstrings_from_file(file_path): - """ - Extract docstrings from functions listed in the __all__ list of a Python file. - - Args: - - file_path: The path to the Python file. - - Returns: - - A dictionary mapping function names to their docstrings. - """ - docstrings = {} - with open(file_path, "r") as f: - tree = ast.parse(f.read(), filename=file_path) - all_list = None - for node in tree.body: - if isinstance(node, ast.Assign): - if ( - isinstance(node.targets[0], ast.Name) - and node.targets[0].id == "__all__" - ): - all_list = [ - expr.s for expr in node.value.elts if isinstance(expr, ast.Str) - ] - elif isinstance(node, ast.FunctionDef): - if all_list and node.name in all_list: - docstring = ast.get_docstring(node) or "No docstring found." - docstrings[node.name] = docstring - return docstrings - - -def extract_from_docs(docstring): - """Extract the parallel documentation and parallel parameter description from the given doctring.""" - try: - # Extracting Parallel Computation description - # Assuming that the first para in docstring is the function's PC desc - # "par" is short for "parallel" - par_docs_ = docstring.split("\n\n")[0] - par_docs_ = par_docs_.split("\n") - par_docs_ = [line.strip() for line in par_docs_ if line.strip()] - par_docs = "\n".join(par_docs_) - except IndexError: - par_docs = None - except Exception as e: - print(e) - par_docs = None - - try: - # Extracting extra parameters - # Assuming that the last para in docstring is the function's extra params - par_params = {} - par_params_ = docstring.split("------------\n")[1] - - par_params_ = par_params_.split("\n\n\n") - for i in par_params_: - j = i.split("\n") - par_params[j[0]] = "\n".join( - [line.strip() for line in j[1:] if line.strip()] - ) - if i == par_params_[-1]: - par_params[j[0]] = "\n".join( - [line.strip() for line in j[1:-1] if line.strip()] - ) - except IndexError: - par_params = None - except Exception as e: - print(e) - par_params = None - return par_docs, par_params - - -def get_url(file_path, function_name): - """Return the URL to the given function in the given file.""" - file_url = ( - "https://github.com/networkx/nx-parallel/blob/main/nx_parallel" - + file_path.split("nx_parallel")[-1] - + "#L" - ) - with open(file_path, "r") as f: - tree = ast.parse(f.read(), filename=file_path) - for node in ast.walk(tree): - if isinstance(node, ast.FunctionDef) and node.name == function_name: - return file_url + str(node.lineno) - return file_url diff --git a/_nx_parallel/script.sh b/_nx_parallel/script.sh new file mode 100644 index 0000000..e57323d --- /dev/null +++ b/_nx_parallel/script.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +python _nx_parallel/update_get_info.py + +ruff format "_nx_parallel/temp__init__.py" + +# Check if there's any difference between the original file and the formatted one +if diff -q "_nx_parallel/__init__.py" "_nx_parallel/temp__init__.py" >/dev/null; then + rm "_nx_parallel/temp__init__.py" +else + mv "_nx_parallel/temp__init__.py" "_nx_parallel/__init__.py" +fi diff --git a/_nx_parallel/update_get_info.py b/_nx_parallel/update_get_info.py new file mode 100644 index 0000000..6259105 --- /dev/null +++ b/_nx_parallel/update_get_info.py @@ -0,0 +1,136 @@ +import os +import ast + +__all__ = ["get_funcs_info", "extract_docstrings_from_file", "extract_from_docs"] + +# Helper functions for get_info + + +def get_funcs_info(): + """Return a dictionary with information about all the functions.""" + funcs = {} + + nx_parallel_dir = os.path.join(os.getcwd(), "nx_parallel") + for root, dirs, files in os.walk(nx_parallel_dir): + for file in files: + if ( + file.endswith(".py") + and file != "__init__.py" + and not file.startswith("test_") + ): + path = os.path.join(root, file) + d = extract_docstrings_from_file(path) + for func in d: + par_docs, par_params = extract_from_docs(d[func]) + funcs[func] = { + "url": get_url(path, func), + "additional_docs": par_docs, + "additional_parameters": par_params, + } + return funcs + + +def extract_docstrings_from_file(file_path): + """ + Extract docstrings from functions listed in the __all__ list of a Python file. + + Args: + - file_path: The path to the Python file. + + Returns: + - A dictionary mapping function names to their docstrings. + """ + docstrings = {} + with open(file_path, "r") as f: + tree = ast.parse(f.read(), filename=file_path) + all_list = None + for node in tree.body: + if isinstance(node, ast.Assign): + if ( + isinstance(node.targets[0], ast.Name) + and node.targets[0].id == "__all__" + ): + all_list = [ + expr.s for expr in node.value.elts if isinstance(expr, ast.Str) + ] + elif isinstance(node, ast.FunctionDef): + if all_list and node.name in all_list: + docstring = ast.get_docstring(node) or "No docstring found." + docstrings[node.name] = docstring + return docstrings + + +def extract_from_docs(docstring): + """Extract the parallel documentation and parallel parameter description from the given doctring.""" + try: + # Extracting Parallel Computation description + # Assuming that the first para in docstring is the function's PC desc + # "par" is short for "parallel" + par_docs_ = docstring.split("\n\n")[0] + par_docs_ = par_docs_.split("\n") + par_docs_ = [line.strip() for line in par_docs_ if line.strip()] + par_docs = " ".join(par_docs_) + par_docs = par_docs.replace("\n", " ") + except IndexError: + par_docs = None + except Exception as e: + print(e) + par_docs = None + + try: + # Extracting extra parameters + # Assuming that the last para in docstring is the function's extra params + par_params = {} + par_params_ = docstring.split("------------\n")[1] + + par_params_ = par_params_.split("\n\n\n") + for i in par_params_: + j = i.split("\n") + par_params[j[0]] = "\n".join( + [line.strip() for line in j[1:] if line.strip()] + ) + if i == par_params_[-1]: + par_params[j[0]] = " ".join( + [line.strip() for line in j[1:-1] if line.strip()] + ) + par_docs = par_docs.replace("\n", " ") + except IndexError: + par_params = None + except Exception as e: + print(e) + par_params = None + return par_docs, par_params + + +def get_url(file_path, function_name): + """Return the URL to the given function in the given file.""" + file_url = ( + "https://github.com/networkx/nx-parallel/blob/main/nx_parallel" + + file_path.split("nx_parallel")[-1] + + "#L" + ) + with open(file_path, "r") as f: + tree = ast.parse(f.read(), filename=file_path) + for node in ast.walk(tree): + if isinstance(node, ast.FunctionDef) and node.name == function_name: + return file_url + str(node.lineno) + return file_url + + +# Creating a temp__init__.py file + +string = '''# This file was automatically generated by update_get_info.py + + +def get_info(): + """Return a dictionary with information about the package.""" + return { + "backend_name": "parallel", + "project": "nx-parallel", + "package": "nx_parallel", + "url": "https://github.com/networkx/nx-parallel", + "short_summary": "Parallel backend for NetworkX algorithms", + "functions": ''' + +with open("_nx_parallel/temp__init__.py", "w") as f: + f.write(string + str(get_funcs_info()) + "}\n") diff --git a/pyproject.toml b/pyproject.toml index baf42f2..e6d0149 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,5 +43,5 @@ packages = ["_nx_parallel", "nx_parallel",] line-length = 88 target-version = 'py310' -[tool.ruff.per-file-ignores] -"__init__.py" = ['I', 'F403'] +[tool.ruff.lint] +per-file-ignores = { "__init__.py" = ['I', 'F403'] }