Chore/style cleanup utility functions (#80)

* feat: simplify interface.py, while adding type hints, decorators, and global convenience variables where appropriate * ref: utility functions; add type hints, optimize memory usage, and enhance docstrings. * feat: Enhance _configure_if_nx_active decorator: add type hints, optimize config handling, and refine docstrings. * chore: standard NX_GTYPE hints, remove dangling import * fix: circular import errors * fix: failing tests * test: add unit tests for utils/chunk.py * fix: create separate types.py module to avoid import order int __init__.py that fails ruff formatting * fix: create separate types.py module to avoid import order int __init__.py that fails ruff formatting * chore: pre-commit formatting * fix: revert to orig * chore: Add .history to .gitignore * chore: Make script.sh executable * chore: revert script.sh from main * fix(update_get_info): correctly exclude utils functions from __init__.py generation * feat: remove type annotations from function signatures * feat: remove type annotations from function signatures * Update nx_parallel/interface.py Co-authored-by: Dan Schult <dschult@colgate.edu> * Update nx_parallel/interface.py Co-authored-by: Dan Schult <dschult@colgate.edu> * feat: respond to code review * chore: commit unstaged changes * fix(test): update enums to plural * fix: revert enums to singular to prevent breaking changes * Update .gitignore Co-authored-by: Aditi Juneja <91629733+Schefflera-Arboricola@users.noreply.github.com> * Update nx_parallel/utils/chunk.py Co-authored-by: Aditi Juneja <91629733+Schefflera-Arboricola@users.noreply.github.com> * Update nx_parallel/utils/chunk.py Co-authored-by: Aditi Juneja <91629733+Schefflera-Arboricola@users.noreply.github.com> * Update nx_parallel/utils/tests/test_chunk.py Co-authored-by: Aditi Juneja <91629733+Schefflera-Arboricola@users.noreply.github.com> * Update nx_parallel/utils/chunk.py Co-authored-by: Aditi Juneja <91629733+Schefflera-Arboricola@users.noreply.github.com> * chore: run precommit * chore: run precommit * feat: remove GraphIteratorType * feat: remove GraphIteratorType * feat: remove GraphIteratorType * chire: docstring spacing * chore: .gitignore * chore: .gitignore * Update nx_parallel/utils/chunk.py Co-authored-by: Dan Schult <dschult@colgate.edu> * chore: address styling discrepancies highlighted in code review * chore: remove .DS_Store * chore: manually add in ValueError wording change * chore: manually add in ValueError wording change * doc: enforce imperative mood in docstring * chore: rerun precommit * chore: rerun precommit * chore: rerun precommit * chore: rerun precommit * moved update-get-info hook to the end --------- Co-authored-by: Derek Alexander <derekpisner@dpisner-MB.local> Co-authored-by: Dan Schult <dschult@colgate.edu> Co-authored-by: Aditi Juneja <91629733+Schefflera-Arboricola@users.noreply.github.com>
networkx · Sep 27, 2024 · 3ebd2a8 · 3ebd2a8
1 parent c73066f
commit 3ebd2a8
Show file tree

Hide file tree

Showing 8 changed files with 201 additions and 139 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -2,13 +2,6 @@
 # pre-commit install
 
 repos:
-  - repo: local
-    hooks:
-      - id: update-get_info
-        name: Update function info
-        entry: sh _nx_parallel/script.sh
-        language: system
-        pass_filenames: false
   - repo: https://github.com/adamchainz/blacken-docs
     rev: 1.16.0
     hooks:
@@ -26,3 +19,10 @@ repos:
         args:
           - --fix
       - id: ruff-format
+  - repo: local
+    hooks:
+      - id: update-get_info
+        name: Update function info
+        entry: sh _nx_parallel/script.sh
+        language: system
+        pass_filenames: false
diff --git a/_nx_parallel/update_get_info.py b/_nx_parallel/update_get_info.py
@@ -20,7 +20,7 @@ def get_funcs_info():
 
     nx_parallel_dir = os.path.join(os.getcwd(), "nx_parallel")
     for root, dirs, files in os.walk(nx_parallel_dir):
-        if root[-29:] == "nx-parallel/nx_parallel/utils":
+        if "nx_parallel/utils" in root:
             continue
         for file in files:
             if (

diff --git a/nx_parallel/interface.py b/nx_parallel/interface.py
@@ -1,50 +1,57 @@
-from nx_parallel.algorithms.bipartite.redundancy import node_redundancy
-from nx_parallel.algorithms.centrality.betweenness import (
-    betweenness_centrality,
-    edge_betweenness_centrality,
-)
-from nx_parallel.algorithms.shortest_paths.generic import all_pairs_all_shortest_paths
-from nx_parallel.algorithms.shortest_paths.weighted import (
-    all_pairs_dijkstra,
-    all_pairs_dijkstra_path_length,
-    all_pairs_dijkstra_path,
-    all_pairs_bellman_ford_path_length,
-    all_pairs_bellman_ford_path,
-    johnson,
-)
-from nx_parallel.algorithms.shortest_paths.unweighted import (
-    all_pairs_shortest_path,
-    all_pairs_shortest_path_length,
-)
-from nx_parallel.algorithms.efficiency_measures import local_efficiency
-from nx_parallel.algorithms.isolate import number_of_isolates
-from nx_parallel.algorithms.tournament import (
-    is_reachable,
-    tournament_is_strongly_connected,
-)
-from nx_parallel.algorithms.vitality import closeness_vitality
-from nx_parallel.algorithms.approximation.connectivity import (
-    approximate_all_pairs_node_connectivity,
-)
-from nx_parallel.algorithms.connectivity import connectivity
-from nx_parallel.algorithms.cluster import square_clustering
+from operator import attrgetter
 import networkx as nx
+from nx_parallel import algorithms
 
 __all__ = ["BackendInterface", "ParallelGraph"]
 
 
+ALGORITHMS = [
+    # Bipartite
+    "node_redundancy",
+    # Isolates
+    "number_of_isolates",
+    # Vitality
+    "closeness_vitality",
+    # Tournament
+    "is_reachable",
+    "tournament_is_strongly_connected",
+    # Centrality
+    "betweenness_centrality",
+    "edge_betweenness_centrality",
+    # Efficiency
+    "local_efficiency",
+    # Shortest Paths : generic
+    "all_pairs_all_shortest_paths",
+    # Shortest Paths : weighted graphs
+    "all_pairs_dijkstra",
+    "all_pairs_dijkstra_path_length",
+    "all_pairs_dijkstra_path",
+    "all_pairs_bellman_ford_path_length",
+    "all_pairs_bellman_ford_path",
+    "johnson",
+    # Clustering
+    "square_clustering",
+    # Shortest Paths : unweighted graphs
+    "all_pairs_shortest_path",
+    "all_pairs_shortest_path_length",
+    # Approximation
+    "approximate_all_pairs_node_connectivity",
+    # Connectivity
+    "connectivity.all_pairs_node_connectivity",
+]
+
+
 class ParallelGraph:
     """A wrapper class for networkx.Graph, networkx.DiGraph, networkx.MultiGraph,
-    and networkx.MultiDiGraph."""
+    and networkx.MultiDiGraph.
+    """
 
     __networkx_backend__ = "parallel"
 
     def __init__(self, graph_object=None):
         if graph_object is None:
             self.graph_object = nx.Graph()
-        elif isinstance(
-            graph_object, (nx.Graph, nx.DiGraph, nx.MultiGraph, nx.MultiDiGraph)
-        ):
+        elif isinstance(graph_object, nx.Graph):
             self.graph_object = graph_object
         else:
             self.graph_object = nx.Graph(graph_object)
@@ -56,70 +63,36 @@ def is_directed(self):
         return self.graph_object.is_directed()
 
     def __str__(self):
-        return "Parallel" + str(self.graph_object)
-
-
-class BackendInterface:
-    """BackendInterface class for parallel algorithms."""
+        return f"Parallel{self.graph_object}"
 
-    # Bipartite
-    node_redundancy = node_redundancy
 
-    # Isolates
-    number_of_isolates = number_of_isolates
+def assign_algorithms(cls):
+    """Class decorator to assign algorithms to the class attributes."""
+    for attr in ALGORITHMS:
+        # get the function name by parsing the module hierarchy
+        func_name = attr.rsplit(".", 1)[-1]
+        setattr(cls, func_name, attrgetter(attr)(algorithms))
+    return cls
 
-    # Vitality
-    closeness_vitality = closeness_vitality
 
-    # Tournament
-    is_reachable = is_reachable
-    tournament_is_strongly_connected = tournament_is_strongly_connected
-
-    # Centrality
-    betweenness_centrality = betweenness_centrality
-    edge_betweenness_centrality = edge_betweenness_centrality
-
-    # Efficiency
-    local_efficiency = local_efficiency
-
-    # Shortest Paths : generic
-    all_pairs_all_shortest_paths = all_pairs_all_shortest_paths
-
-    # Shortest Paths : weighted graphs
-    all_pairs_dijkstra = all_pairs_dijkstra
-    all_pairs_dijkstra_path_length = all_pairs_dijkstra_path_length
-    all_pairs_dijkstra_path = all_pairs_dijkstra_path
-    all_pairs_bellman_ford_path_length = all_pairs_bellman_ford_path_length
-    all_pairs_bellman_ford_path = all_pairs_bellman_ford_path
-    johnson = johnson
-
-    # Clustering
-    square_clustering = square_clustering
-
-    # Shortest Paths : unweighted graphs
-    all_pairs_shortest_path = all_pairs_shortest_path
-    all_pairs_shortest_path_length = all_pairs_shortest_path_length
-
-    # Approximation
-    approximate_all_pairs_node_connectivity = approximate_all_pairs_node_connectivity
-
-    # Connectivity
-    all_pairs_node_connectivity = connectivity.all_pairs_node_connectivity
-
-    # =============================
+@assign_algorithms
+class BackendInterface:
+    """BackendInterface class for parallel algorithms."""
 
     @staticmethod
     def convert_from_nx(graph, *args, **kwargs):
         """Convert a networkx.Graph, networkx.DiGraph, networkx.MultiGraph,
-        or networkx.MultiDiGraph to a ParallelGraph."""
+        or networkx.MultiDiGraph to a ParallelGraph.
+        """
         if isinstance(graph, ParallelGraph):
             return graph
         return ParallelGraph(graph)
 
     @staticmethod
     def convert_to_nx(result, *, name=None):
         """Convert a ParallelGraph to a networkx.Graph, networkx.DiGraph,
-        networkx.MultiGraph, or networkx.MultiDiGraph."""
+        networkx.MultiGraph, or networkx.MultiDiGraph.
+        """
         if isinstance(result, ParallelGraph):
             return result.graph_object
         return result
diff --git a/nx_parallel/tests/test_get_chunks.py b/nx_parallel/tests/test_get_chunks.py
@@ -2,11 +2,12 @@
 
 import inspect
 import importlib
-import networkx as nx
-import nx_parallel as nxp
 import random
 import types
 import math
+import networkx as nx
+
+import nx_parallel as nxp
 
 
 def get_all_functions(package_name="nx_parallel"):

diff --git a/nx_parallel/utils/chunk.py b/nx_parallel/utils/chunk.py
@@ -2,65 +2,94 @@
 import os
 import networkx as nx
 
+
 __all__ = ["chunks", "get_n_jobs", "create_iterables"]
 
 
-def chunks(iterable, n):
-    """Divides an iterable into chunks of size n"""
+def chunks(iterable, n_chunks):
+    """Yield exactly `n_chunks` chunks from `iterable`, balancing the chunk sizes."""
+    iterable = list(iterable)
+    k, m = divmod(len(iterable), n_chunks)
     it = iter(iterable)
-    while True:
-        x = tuple(itertools.islice(it, n))
-        if not x:
-            return
-        yield x
+    for _ in range(n_chunks):
+        chunk_size = k + (1 if m > 0 else 0)
+        m -= 1
+        yield tuple(itertools.islice(it, chunk_size))
 
 
 def get_n_jobs(n_jobs=None):
-    """Returns the positive value of `n_jobs`."""
+    """Get the positive value of `n_jobs`
+
+    Returns the positive value of `n_jobs` by either extracting it from the
+    active configuration system or modifying the passed-in value, similar to
+    joblib's behavior.
+
+    - If running under pytest, it returns 2 jobs.
+    - If the `active` configuration in NetworkX's config is `True`, `n_jobs`
+      is extracted from the NetworkX config.
+    - Otherwise, `n_jobs` is obtained from joblib's active backend.
+    - `ValueError` is raised if `n_jobs` is 0.
+    """
     if "PYTEST_CURRENT_TEST" in os.environ:
         return 2
-    else:
+
+    if n_jobs is None:
         if nx.config.backends.parallel.active:
             n_jobs = nx.config.backends.parallel.n_jobs
         else:
             from joblib.parallel import get_active_backend
 
             n_jobs = get_active_backend()[1]
-        n_cpus = os.cpu_count()
-        if n_jobs is None:
-            return 1
-        if n_jobs < 0:
-            return n_cpus + n_jobs + 1
-        if n_jobs == 0:
-            raise ValueError("n_jobs == 0 in Parallel has no meaning")
-        return int(n_jobs)
+
+    if n_jobs is None:
+        return 1
+    if n_jobs < 0:
+        return os.cpu_count() + n_jobs + 1
+
+    if n_jobs == 0:
+        raise ValueError("n_jobs == 0 in Parallel has no meaning")
+
+    return int(n_jobs)
 
 
 def create_iterables(G, iterator, n_cores, list_of_iterator=None):
-    """Creates an iterable of function inputs for parallel computation
+    """Create an iterable of function inputs for parallel computation
     based on the provided iterator type.
 
     Parameters
-    -----------
+    ----------
     G : NetworkX graph
+        The NetworkX graph.
     iterator : str
         Type of iterator. Valid values are 'node', 'edge', 'isolate'
+    n_cores : int
+        The number of cores to use.
+    list_of_iterator : list, optional
+        A precomputed list of items to iterate over. If None, it will
+        be generated based on the iterator type.
 
-    Returns:
-    --------
+    Returns
+    -------
     iterable : Iterable
         An iterable of function inputs.
+
+    Raises
+    ------
+    ValueError
+        If the iterator type is not one of "node", "edge" or "isolate".
     """
 
-    if iterator in ["node", "edge", "isolate"]:
-        if list_of_iterator is None:
-            if iterator == "node":
-                list_of_iterator = list(G.nodes)
-            elif iterator == "edge":
-                list_of_iterator = list(G.edges)
-            elif iterator == "isolate":
-                list_of_iterator = list(nx.isolates(G))
-        num_in_chunk = max(len(list_of_iterator) // n_cores, 1)
-        return chunks(list_of_iterator, num_in_chunk)
-    else:
-        raise ValueError("Invalid iterator type.")
+    if not list_of_iterator:
+        if iterator == "node":
+            list_of_iterator = list(G.nodes)
+        elif iterator == "edge":
+            list_of_iterator = list(G.edges)
+        elif iterator == "isolate":
+            list_of_iterator = list(nx.isolates(G))
+        else:
+            raise ValueError(f"Invalid iterator type: {iterator}")
+
+    if not list_of_iterator:
+        return iter([])
+
+    return chunks(list_of_iterator, n_cores)