-
Notifications
You must be signed in to change notification settings - Fork 3
/
asyn_fluid_communities.py
118 lines (111 loc) · 4.27 KB
/
asyn_fluid_communities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# -*- coding: utf-8 -*-
# Copyright (C) 2017
# All rights reserved.
# BSD license.
"""Asynchronous fluid communities algorithm for community detection."""
from collections import Counter
import random
__all__ = ['asyn_fluidc']
# Optional to fix the random seed
# random.seed(123)
def asyn_fluidc(G, k, max_iter=100):
"""
Fluid Communities: A Competitive and Highly Scalable Community Detection Algorithm.
Args:
- G: Graph to run the algorithm into.
+ type: networkx.Graph
- k: Number of communities to search.
+ type: int
- max_iter: Number of maximum iterations allowed.
+ type: int
Return:
- List of communities, where each community is a list of vertex ID.
Each vertex ID can be either an int or str.
+ type: list(list(int or str))
"""
# Initialization
max_density = 1.0
vertices = list(G)
random.shuffle(vertices)
communities = {n: i for i, n in enumerate(vertices[:k])}
density = {}
com_to_numvertices = {}
for vertex in communities.keys():
com_to_numvertices[communities[vertex]] = 1
density[communities[vertex]] = max_density
# Set up control variables and start iterating
iter_count = 0
cont = True
while cont:
cont = False
iter_count += 1
# Loop over all vertices in graph in a random order
vertices = list(G)
random.shuffle(vertices)
for vertex in vertices:
# Updating rule
com_counter = Counter()
# Take into account self vertex community
try:
com_counter.update({communities[vertex]: density[communities[vertex]]})
except KeyError:
pass
# Gather neighbour vertex communities
for v in G[vertex]:
try:
com_counter.update({communities[v]: density[communities[v]]})
except KeyError:
continue
# Check which is the community with highest density
new_com = -1
if len(com_counter.keys()) > 0:
max_freq = max(com_counter.values())
best_communities = [com for com, freq in com_counter.items()
if (max_freq - freq) < 0.0001]
# If actual vertex com in best communities, it is preserved
try:
if communities[vertex] in best_communities:
new_com = communities[vertex]
except KeyError:
pass
# If vertex community changes...
if new_com == -1:
# Set flag of non-convergence
cont = True
# Randomly chose a new community from candidates
new_com = random.choice(best_communities)
# Update previous community status
try:
com_to_numvertices[communities[vertex]] -= 1
density[communities[vertex]] = max_density / \
com_to_numvertices[communities[vertex]]
except KeyError:
pass
# Update new community status
communities[vertex] = new_com
com_to_numvertices[communities[vertex]] += 1
density[communities[vertex]] = max_density / \
com_to_numvertices[communities[vertex]]
# If maximum iterations reached --> output actual results
if iter_count > max_iter:
print 'Exiting by max iterations!'
break
# Return results by grouping communities as list of vertices
return list(_invert_dict(communities).values())
def _invert_dict(orig_dict):
"""
Inverting Python dictionary keys and values: Many to one --> One to many
Args:
- orig_dict: Dictionary desired to invert.
+ type: dict
Return:
- Inverted dictionary
+ type: dict
"""
return_dict = {}
for v, k in orig_dict.items():
try:
return_dict[k].append(v)
except KeyError:
return_dict[k] = [v]
return return_dict