Skip to content

Commit

Permalink
added get_chunks and updated heatmap(improved speedups), updated docs
Browse files Browse the repository at this point in the history
  • Loading branch information
Schefflera-Arboricola committed Feb 29, 2024
1 parent 3ad8068 commit 94de49a
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 56 deletions.
61 changes: 20 additions & 41 deletions nx_parallel/algorithms/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,51 +2,26 @@
from joblib import Parallel, delayed
import nx_parallel as nxp

__all__ = ["square_clustering_no_chunk", "square_clustering_chunk"]
__all__ = [
"square_clustering",
]


def square_clustering_no_chunk(G, nodes=None):
"""The squares clustering coefficient for nodes for all nodes
are computed in parallel over all available CPU cores.
def square_clustering(G, nodes=None, get_chunks="chunks"):
"""The nodes are chunked into `node_chunks` and then the square clustering
coefficient for all `node_chunks` are computed in parallel over all available
CPU cores.
Parameters
------------
get_chunks : str, function (default = "chunks")
A function that takes in a list of all the nodes (or nbunch) as input and
returns an iterable `node_chunks`. The default chunking is done by slicing the
`nodes` into `n` chunks, where `n` is the number of CPU cores.
networkx.square_clustering: https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.cluster.square_clustering.html
"""

def _compute_clustering(v):
clustering = 0
potential = 0
for u, w in combinations(G[v], 2):
squares = len((set(G[u]) & set(G[w])) - {v})
clustering += squares
degm = squares + 1
if w in G[u]:
degm += 1
potential += (len(G[u]) - degm) + (len(G[w]) - degm) + squares
if potential > 0:
clustering /= potential
return (v, clustering)

if hasattr(G, "graph_object"):
G = G.graph_object

if nodes is None:
node_iter = G
else:
node_iter = G.nbunch_iter(nodes)

total_cores = nxp.cpu_count()

result = Parallel(n_jobs=total_cores)(
delayed(_compute_clustering)(v) for v in node_iter
)
clustering = dict(result)

if nodes in G:
return clustering[nodes]
return clustering


def square_clustering_chunk(G, nodes=None):
def _compute_clustering_chunk(node_iter_chunk):
result_chunk = []
for v in node_iter_chunk:
Expand All @@ -73,8 +48,12 @@ def _compute_clustering_chunk(node_iter_chunk):
node_iter = list(G.nbunch_iter(nodes))

total_cores = nxp.cpu_count()
num_in_chunk = max(min(len(node_iter) // total_cores, 10), 1)
node_iter_chunks = nxp.chunks(node_iter, num_in_chunk)

if get_chunks == "chunks":
num_in_chunk = max(len(node_iter) // total_cores, 1)
node_iter_chunks = nxp.chunks(node_iter, num_in_chunk)
else:
node_iter_chunks = get_chunks(node_iter)

result = Parallel(n_jobs=total_cores)(
delayed(_compute_clustering_chunk)(node_iter_chunk)
Expand Down
4 changes: 2 additions & 2 deletions nx_parallel/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
tournament_is_strongly_connected,
)
from nx_parallel.algorithms.vitality import closeness_vitality
# from nx_parallel.algorithms.cluster import square_clustering
from nx_parallel.algorithms.cluster import square_clustering

__all__ = ["Dispatcher", "ParallelGraph"]

Expand Down Expand Up @@ -51,7 +51,7 @@ class Dispatcher:
all_pairs_bellman_ford_path = all_pairs_bellman_ford_path

# Clustering
# square_clustering = square_clustering
square_clustering = square_clustering

# =============================

Expand Down
Binary file removed timing/heatmap_square_clustering_chunk_timing.png
Binary file not shown.
Binary file modified timing/heatmap_square_clustering_timing.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
24 changes: 11 additions & 13 deletions timing/timing_individual_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
number_of_nodes_list = [10, 50, 100, 300, 500]
pList = [1, 0.8, 0.6, 0.4, 0.2]
weighted = False
currFun = nxp.square_clustering_chunk
currFun = nx.square_clustering
for p in pList:
for num in number_of_nodes_list:
# create original and parallel graphs
Expand All @@ -30,21 +30,19 @@

# time both versions and update heatmapDF
t1 = time.time()
c1 = nxp.square_clustering_chunk(H)
if isinstance(c1, types.GeneratorType):
d = dict(c1)
c = nx.square_clustering(H)
if isinstance(c, types.GeneratorType):
d = dict(c)
t2 = time.time()
newTime = t2 - t1
parallelTime = t2 - t1
t1 = time.time()
c2 = nxp.square_clustering_no_chunk(H)
if isinstance(c2, types.GeneratorType):
d = dict(c2)
c = nx.square_clustering(G)
if isinstance(c, types.GeneratorType):
d = dict(c)
t2 = time.time()
oldTime = t2 - t1
timesFaster = oldTime / newTime
stdTime = t2 - t1
timesFaster = stdTime / parallelTime
heatmapDF.at[num, p] = timesFaster
print(num, p, timesFaster)
print(c1 == c2)
print("Finished " + str(currFun))

# Code to create for row of heatmap specifically for tournaments
Expand Down Expand Up @@ -77,7 +75,7 @@
plt.xticks(rotation=45)
plt.yticks(rotation=20)
plt.title(
"Small Scale Demo: Times Speedups of " + currFun.__name__ + " - chunk vs no chunk"
"Small Scale Demo: Times Speedups of " + currFun.__name__ + " compared to NetworkX"
)
plt.xlabel("Number of Vertices")
plt.ylabel("Edge Probability")
Expand Down

0 comments on commit 94de49a

Please sign in to comment.