-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathqopin_tb.py
executable file
·332 lines (270 loc) · 13.4 KB
/
qopin_tb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import pdb
import copy
import random
import sys
from boltzmann_explr_opin import boltzmann_explr_opin
from compute_prob import compute_prob
from gen_graph import gen_graph
from prune_feed import prune_feed
from normalize_vector import normalize_vector
from load_graph import load_graph
from visualize_graph import visualize_graph
from display_centralities import display_centralities
from save_results_qopin import save_results_qopin
def qopin_tb(sim, Params):
#Initialization
seed = sim['iterNum']
random.seed(seed)
rememberingFactor = 1
blfBatchSize = Params['blfBatchSize']
gammVal = Params['gammaVal']
if Params['genGraphFlg'] == True:
(g, numNbrDict, gnx) = gen_graph(Params)
else:
(g, numNbrDict, gnx) = load_graph(Params)
if Params['graphName'] == 'rgg' or Params['graphName'] == 'grid2d':
pos = nx.get_node_attributes(gnx, 'pos')
else:
pos = nx.spectral_layout(gnx, scale=4)
#pos = nx.shell_layout(gnx, scale=2)
########################## initialization begin #######################################
# New params
tauMax = Params['tauMax']
learnRate = Params['learnRate']
actionSet = {} # dict dict list
# initializing src node, Q-table and action sets
srcQNode = Params['srcQNode']
srcRNode = Params['srcRNode']
numNodes = Params['numNodes']
# Display centralities and degrees of the nodes
[cntraltyEigVec] = display_centralities(gnx, srcQNode, srcRNode)
actionSet = g
Q = copy.deepcopy(actionSet)
Qdel = copy.deepcopy(actionSet)
cntMat = copy.deepcopy(actionSet)
for indNode in range(numNodes):
Q[indNode] = [0.5]*len(actionSet[indNode])
cntMat[indNode] = [0]*len(actionSet[indNode])
Qdel[indNode] = [0]*len(actionSet[indNode])
informerDict = {x:[] for x in range(numNodes)}
informeeDict = {x:[] for x in range(numNodes)}
opinionList = []
opinionListOppon = []
feedList = {x:[[],[],[],[]] for x in range(numNodes)} # [[Q or R message],[time of reception],[incoming node], [No. of times msg is Tx]]
infNodesDumpAllRnd = {} # For future use. Collecting stats of the nodes informed at each time slot for all rounds
blfMat = np.ones((numNodes, 2))
sumOpinionQ = np.array(Params['numRounds']*[0])
sumOpinionR = np.array(Params['numRounds']*[0])
#blfMatAllRnd = np.array([])
blfMatAllRnd = list()
#Loop for all rounds
for indRnd in range(Params['numRounds']):
if np.mod(indRnd, 10) == 0:
print("Round", indRnd, end=",")
sys.stdout.flush()
#################### initialization - for each round - begin ####################
qTxNodesList = []
rTxNodesList = []
qChosenFeedList = []
rChosenFeedList = []
infNodesDumpOneRnd = []
infNodesDumpOneRnd.append([srcQNode])
# initialize state sequence
stateSeq = {x:[] for x in range(numNodes)}
# keep a count of the number of occurence of each state-action pair (size of count matrix = size of action set)
del qTxNodesList[:]
del rTxNodesList[:]
del qChosenFeedList[:]
del rChosenFeedList[:]
feedDelDict = {x:[] for x in range(numNodes)}
blfMatPrev = copy.deepcopy(blfMat)
# Loop for all nodes
for indNode in range(numNodes):
# 1. Remove all the messages older than tauMax [TODO]
feedList = prune_feed(feedList, indNode, int(indRnd/blfBatchSize), tauMax)
# 2. If source node (Q or R), then
if indNode == srcQNode:
# collect infQ nodes
qTxNodesList.append(indNode)
qChosenFeedList.append(0)
elif indNode == srcRNode:
# collect infR nodes
rTxNodesList.append(indNode)
rChosenFeedList.append(0)
else:
# check if feedsize>0
if len(feedList[indNode][0]) != 0:
# Choose a message from list w.p. exp(-tau)
# Get probability vector from exp(-tau)
[probFeedVec, noTx] = compute_prob(feedList[indNode][1],feedList[indNode][3],int(indRnd/blfBatchSize), Params['eta'], Params['xi'])
#if len(probFeedVec)>1:
# pdb.set_trace()
if noTx == False:
if len(probFeedVec) == 1:
chosenFeedTmp = [0]
else:
chosenFeedTmp = np.random.choice(len(probFeedVec),1,p=probFeedVec)
chosenFeed = chosenFeedTmp[0]
chosenMsg = feedList[indNode][0][chosenFeed]
# If no message chosen dont collect nodes otherwise collect - Does not occur now. We can consider finite buffer size [TODO]
# Collect qTx nodes or collect rTx nodes
if chosenMsg == 1:
qTxNodesList.append(indNode)
qChosenFeedList.append(chosenFeed)
else:
rTxNodesList.append(indNode)
rChosenFeedList.append(chosenFeed)
# Loop for all qTx nodes
for (loopInd, indNode) in enumerate(qTxNodesList):
# 2. Compute p_q (probability of transmitting message m_q)
probSendMsgBlf = blfMatPrev[indNode][1]/np.sum(blfMatPrev[indNode])
# 3. sample sendMsgFlg using p_q
sendMsgSamp = (np.random.rand(1,1)<=probSendMsgBlf)
if indNode == srcQNode:
sendMsgSamp = True
# 4. If sendMsgFlg == True then
# 4.a. Choose recipient (neighbor) using action-values by Boltzmann exploration rule
# 4.b. Update belief of the chosen recipient
# 4.c. Compute reward, update Q-table
# 4.d. send msg
# 4.e. delete msg
if sendMsgSamp == True:
# 4.a. Choose a neighbor using boltzmann exploration rule
if indNode == srcQNode:
incomingNodeIndex = []
else:
chosenFeed = qChosenFeedList[loopInd] # location of the feed
incomingNode = feedList[indNode][2][chosenFeed] # incoming node for indNode
incomingNodeIndexTmp = np.where(np.array(g[indNode]) == incomingNode)
incomingNodeIndex = incomingNodeIndexTmp[0][0]
if np.mod(indRnd, blfBatchSize) == 0:
feedList[indNode][3][chosenFeed] += 1
seedInBoltzmann = random.randint(1,100000)
[action, actionIdx] = boltzmann_explr_opin(Q[indNode], incomingNodeIndex, g[indNode], Params['tempVal'], seedInBoltzmann)
# 4.b.
#blfMat[action][1] += 1
if np.mod(indRnd, blfBatchSize) == 0:
#blfMat[action][1] = blfMat[action][1] + 1
blfMat[action][1] = blfMat[action][1]*rememberingFactor + 1 # for debug
# 4.c.
if Params['qLrnEn'] == True:
opinion = blfMatPrev[action][1]/np.sum(blfMatPrev[action])
#rwdImm = 10*opinion*(1-opinion)/blfMatPrev[action][1] # for debug
rwdImm = opinion*(1-opinion)/blfMatPrev[action][1]
Qmax = np.max(Q[action])
Q[indNode][actionIdx] = (1-learnRate)*Q[indNode][actionIdx]+learnRate*(rwdImm+gammVal*Qmax)
## 4.e. Delete feed and related records from feedList of indNode
if np.mod(indRnd, blfBatchSize) == 0:
if indNode != srcQNode:
feedDelDict[indNode].append(chosenFeed)
# 4.f Append feed and related records in feedList of action node
feedList[action][0].append(1) # message
feedList[action][1].append(int(indRnd/blfBatchSize)) # time
feedList[action][2].append(indNode) # incoming node
feedList[action][3].append(0) # Number of times msg is Tx
if np.mod(indRnd, blfBatchSize) == 0:
# Loop for all rTx nodes
for (loopInd, indNode) in enumerate(rTxNodesList):
# 2. Compute p_r (probability of transmitting message m_q)
probSendMsgBlf = blfMatPrev[indNode][0]/np.sum(blfMatPrev[indNode])
# 3. sample sendMsgFlg using p_q
sendMsgSamp = (np.random.rand(1,1)<=probSendMsgBlf)
if indNode == srcRNode:
sendMsgSamp = True
if sendMsgSamp == True:
# 4.a. Choose a neighbor using boltzmann exploration rule
if indNode == srcRNode:
incomingNodeIndex = []
else:
chosenFeed = rChosenFeedList[loopInd] # location of the feed
incomingNode = feedList[indNode][2][chosenFeed]
incomingNodeIndexTmp = np.where(np.array(g[indNode]) == incomingNode)
incomingNodeIndex = incomingNodeIndexTmp[0][0]
feedList[indNode][3][chosenFeed] += 1
seedInBoltzmann = random.randint(1,100000)
[action, actionIdx] = boltzmann_explr_opin([1]*numNbrDict[indNode], incomingNodeIndex, g[indNode], Params['tempVal'], seedInBoltzmann)
# 4.b.
#blfMat[action][0] += 1
# no need to check mod(.,.). Because....refer to if
# condition outside the for loop
blfMat[action][0] = blfMat[action][0]*rememberingFactor + 1 # for debug
if indNode != srcRNode:
feedDelDict[indNode].append(chosenFeed)
# 4.f Append feed and related records in feedList of action node
feedList[action][0].append(0) # message
feedList[action][1].append(int(indRnd/blfBatchSize)) # time
feedList[action][2].append(indNode) # incoming node
feedList[action][3].append(0) # Number of times msg is Tx
##deleting feeds
#for indNode in range(numNodes):
# if len(feedDelDict[indNode])>0:
# for ind in sorted(feedDelDict[indNode], reverse=True):
# del feedList[indNode][0][ind]
# del feedList[indNode][1][ind]
# del feedList[indNode][2][ind]
# del feedList[indNode][3][ind]
# del feedDelDict[indNode][:]
if np.mod(indRnd, blfBatchSize) == 0:
muiList = np.divide(blfMat[:,1], np.sum(blfMat, 1))
opinionList.append(np.sum(muiList))
muiOpponList = np.divide(blfMat[:,0], np.sum(blfMat, 1))
opinionListOppon.append(np.sum(muiOpponList))
#if indRnd == 0:
# blfArr = np.atleast_2d(blfMat[:,0])
# blfArr = np.append(blfArr, np.atleast_2d(blfMat[:,1]), axis=1)
# blfMatAllRnd = np.atleast_3d(blfArr)
#else:
# blfArr = np.atleast_2d(blfMat[:,0])
# blfArr = np.append(blfArr, np.atleast_2d(blfMat[:,1]), axis=1)
# blfMatAllRnd = np.append(blfMatAllRnd, np.atleast_3d(blfArr), axis=2)
blfMatAllRnd.append(blfMat)
#if np.mod(indRnd,50*blfBatchSize) == 0 or indRnd == Params['numRounds']-1:
##if indRnd == Params['numRounds']-1:
# visualize_graph(gnx, g, blfMat, pos, Params)
# plt.pause(0.01)
# pdb.set_trace()
if Params['saveSimresFlg'] == True:
dumpVarNames = ['opinionList', 'opinionListOppon','blfMatAllRnd','cntraltyEigVec']
dumpVarVals = [opinionList, opinionListOppon, blfMatAllRnd, cntraltyEigVec]
save_results_qopin(sim, Params, dumpVarNames, dumpVarVals)
plt.figure(2)
plt.plot(opinionList, linewidth=4, markersize=10, label='opinion-1')
plt.plot(opinionListOppon, linewidth=4, markersize=10, label='opinion-2')
plt.ylabel("Sum opinions", fontsize=24)
plt.xlabel("Round", fontsize=24)
if Params['qLrnEn'] == True:
plt.title('with Q-learning', fontsize=24)
else:
plt.title('without Q-learning', fontsize=24)
print(" ")
plt.grid(True)
plt.xticks(fontsize=24)
plt.yticks(fontsize=24)
plt.legend(fontsize=20)
#plt.ylim((140, 360))
plt.show()
plt.show(block=False)
plt.figure(3)
plt.hist(muiList, bins=10, label='opinion-1')
plt.hist(muiOpponList, bins=10, label='opinion-2')
plt.ylabel("Frequency", fontsize=24)
plt.xlabel("Opinion", fontsize=24)
if Params['qLrnEn'] == True:
plt.title('with Q-learning', fontsize=24)
else:
plt.title('without Q-learning', fontsize=24)
print(" ")
plt.grid(True)
plt.xticks(fontsize=24)
plt.yticks(fontsize=24)
plt.legend(fontsize=20)
#plt.ylim((140, 360))
plt.show()
plt.show(block=False)
plt.figure(4)
visualize_graph(gnx, g, blfMat, pos, Params)
plt.show()
plt.show(block=False)