-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathperformance.py
361 lines (284 loc) · 10.1 KB
/
performance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
from datetime import datetime
from backend.educationalRepository.databaseInterfaces.mongoDB_interface import *
from backend.educationalRepository.databaseInterfaces.drive_api import *
from backend.educationalRepository.repositoryAPI.Caching.cache_impl import *
import numpy as np
import random
import time
import collections
import matplotlib.pyplot as plt
def insert_tag(tag_name, parents, id=None):
"""
Inserts a tag in the tag_tree collection. It also adds the tag ID in the main_tree collection.
Parameters:
tag_name (str) : The name of the tag.
parents (list) : The list of parent tag IDs.
Returns:
tag_id (str) : The id of the tag.
"""
# try :
if id is None:
tag_id = 't' + str(getNextSequenceValue("test_db","tagtree_collection"))
else:
tag_id = id
tag_doc = {
"id": tag_id,
"name": tag_name,
"path_to_tag": parents,
}
saveSingleDocument("test_db","tagtree_collection",tag_doc)
updateDocument("test_db","maintree_collection",{"id":parents[-1]},{"$push": {"children_tags":tag_id}})
# parent_folder_id = findSingleDocument("test_db","maintree_collection",{"id":parents[-1]})["drive_id"]
# file = create_folder(tag_name,[parent_folder_id])
main_tree_node_doc = {
"id": tag_id,
"name": tag_name,
"type" : "tag",
"children_tags": [],
"children_posts": [],
'drive_id': 'dfga' #file['id'],
}
saveSingleDocument("test_db","maintree_collection",main_tree_node_doc)
return tag_id
# except:
# print("Could not insert tag. Check if the parents are valid.")
# return None
def delete_random_elements(input_list, n):
to_delete = set(random.sample(input_list, n))
return to_delete
def generate_random_tag_tree(n):
queue = [0]
nodes = range(1,n+1)
file = create_folder('tag0',None)
main_tree_node_doc = {
"id": 't0',
"name": 'tag0',
"type": 'tag',
'children_tags':[],
'children_posts':[],
'drive_id': file['id'],
}
global root_id_drive
root_id_drive = file['id']
saveSingleDocument("test_db","maintree_collection",main_tree_node_doc)
while nodes and queue:
r = random.randint(1,len(nodes))
div_facts = [1,2,3,4]
div_fact = random.choice(div_facts)
children = delete_random_elements(nodes, r//div_fact)
nodes = [i for i in nodes if i not in children]
# print(children,nodes)
q_front = queue.pop(0)
queue.extend(children)
for i in children:
path = [j for j in path_dict[q_front]]
path_dict[i] = path
path_dict[i].append(i)
ids = ['t'+str(i) for i in path_dict[i][:-1]]
# print(ids)
insert_tag('tag'+str(i), ids, id='t'+str(i))
def print_tag():
tag_tree = findAllDocument("test_db","tagtree_collection",{})
for node in tag_tree:
print(node['id'], "--->", node["path_to_tag"])
def create_posts(n):
posts = []
for i in range(n):
tags = random.choice(list(path_dict.values()))
tags = tags[:-1]
posts.append({
'id': 'p' + str(i),
'type': 'text',
'time': '2020-01-01T00:00:00Z',
'tags': tags,
'caption': 'P' + str(i) + 'Caption',
'text': 'This is a test post',
'author': 'u' + str(random.randint(1,n//2)),
'upvotes': random.randint(1,100),
'image_url': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTKboCkjXUKztIj7P8a5UjeFn0lAMQSp_TqhQ&usqp=CAU',
'is_answered': bool(random.getrandbits(1)),
'is_approved': bool(random.getrandbits(1)),
'comments': ['c13', 'c66'],
'reports': 2
},)
cache.addItem_recent_cache(posts[-1]['id'], datetime.now())
cache.addItem_upvote_cache(posts[-1]['id'], posts[-1]['upvotes'])
saveMultipleDocuments('test_db',"posts_collection",posts)
def post_ID_search(post_id):
post = findSingleDocument("test_db","posts_collection",{"id":post_id})
return post
def post_name_search(post_name):
posts = findAllDocument("test_db","posts_collection",{"caption": {"$regex": post_name}})
result = []
for post in posts:
result.append(post)
return result
def tag_ID_search(tag_id):
main_tree_node = findSingleDocument("test_db","maintree_collection",{"id":tag_id})
post_nodes = []
queue = [main_tree_node]
while queue:
node = queue.pop(0)
if node['type'] == 'post':
post_nodes.append(node)
for child in node['children_tags']:
queue.append(findSingleDocument("test_db","maintree_collection",{"id":child}))
for child in node['children_posts']:
queue.append(findSingleDocument("test_db","maintree_collection",{"id":child}))
result = []
for post in post_nodes:
result.append(findSingleDocument("test_db","posts_collection",{"id":post['id']}))
return result
path_dict = collections.defaultdict(list)
path_dict[0] = [0]
def fetch_most_liked_posts(n):
posts = findAllDocument("test_db","posts_collection",{})
posts = sorted(posts, key=lambda x: x['upvotes'], reverse=True)
return posts[:n]
def fetch_latest_posts(n):
posts = findAllDocument("test_db","posts_collection",{})
posts = sorted(posts, key=lambda x: x['time'], reverse=True)
return posts[:n]
def delete_db():
# delete database
deleteDatabase("test_db")
global root_id_drive
if root_id_drive:
remove_folder(root_id_drive)
root_id_drive = None
cache.clear_cache()
def simulate_search_ID_algorithms(n):
# generate_random_tag_tree(n//5)
# create_posts(n)
times = []
no_of_ids =[]
posts = ['p' + str(i) for i in range(1,n+1)]
for i in range(2,n):
delete_db()
test_db = createDatabase("test_db")
if i < 50:
generate_random_tag_tree(i)
else:
generate_random_tag_tree(i//5)
create_posts(i)
avg = []
for j in range(4):
start = time.time()
search_id = random.sample(posts,1)
post_ID_search(search_id[0])
end = time.time()
avg.append(end-start)
times.append(sum(avg)/len(avg))
no_of_ids.append(i)
plt.plot(no_of_ids,times)
plt.grid(True)
plt.xlabel('No of Posts')
plt.ylabel('Time')
plt.title("Time taken to search for a post ID")
plt.legend(['ID Search'])
plt.show()
def simulate_search_name_algorithms(n):
times = []
no_of_posts =[]
post_captions = ['P' + str(i) + 'Caption' for i in range(0,n)]
for i in range(n):
delete_db()
test_db = createDatabase("test_db")
if i < 50:
generate_random_tag_tree(i)
else:
generate_random_tag_tree(i//5)
avg = []
for j in range(4):
start = time.time()
post_name = random.sample(post_captions,1)
post_name_search(post_name[0])
end = time.time()
avg.append(end-start)
times.append(sum(avg)/len(avg))
no_of_posts.append(i)
plt.plot(no_of_posts,times)
plt.grid(True)
plt.xlabel('No of posts')
plt.ylabel('Time taken')
plt.title('Time taken to search for posts by name')
plt.legend(['Search by name'])
plt.show()
def simulate_cache_latest(n):
time_cache = []
time_no_cache = []
no_of_posts =[]
for i in range(20,n):
delete_db()
test_db = createDatabase("test_db")
generate_random_tag_tree(i)
create_posts(i)
avg_no_cache = []
for j in range(4):
start = time.time()
fetch_latest_posts(10)
end = time.time()
avg_no_cache.append(end-start)
avg_cache = []
for j in range(4):
start = time.time()
post_ids = cache.getAllItems_recent_cache()
# for post_id in post_ids:
# findSingleDocument("test_db","posts_collection",{"id":post_id})
end = time.time()
avg_cache.append(end-start)
time_cache.append(sum(avg_cache)/len(avg_cache))
time_no_cache.append(sum(avg_no_cache)/len(avg_no_cache))
no_of_posts.append(i)
plt.plot(no_of_posts,time_cache,'r',label='Cache')
plt.plot(no_of_posts,time_no_cache,'b',label='No-Cache')
plt.grid(True)
plt.xlabel('No of posts')
plt.ylabel('Time taken')
plt.title('Time taken to search for the top 10 latest posts\n Cache vs No Cache')
plt.legend(loc='best')
plt.show()
def simulate_cache_most_liked(n):
time_cache = []
time_no_cache = []
no_of_posts =[]
for i in range(20,n):
delete_db()
test_db = createDatabase("test_db")
generate_random_tag_tree(i)
create_posts(i)
avg_no_cache = []
for j in range(4):
start = time.time()
fetch_most_liked_posts(10)
end = time.time()
avg_no_cache.append(end-start)
avg_cache = []
for j in range(4):
start = time.time()
post_ids = cache.getAllItems_upvote_cache()
# for post_id in post_ids:
# findSingleDocument("test_db","posts_collection",{"id":post_id})
end = time.time()
avg_cache.append(end-start)
time_cache.append(sum(avg_cache)/len(avg_cache))
time_no_cache.append(sum(avg_no_cache)/len(avg_no_cache))
no_of_posts.append(i)
plt.plot(no_of_posts,time_cache,'r',label='Cache')
plt.plot(no_of_posts,time_no_cache,'b',label='No-Cache')
plt.grid(True)
plt.xlabel('No of posts')
plt.ylabel('Time taken')
plt.title('Time taken to search for the top 10 liked posts\n Cache vs No Cache')
plt.legend(loc='best')
plt.show()
root_id_drive = None
cache = CacheImpl(10)
delete_db()
# create new
test_db = createDatabase("test_db")
simulate_search_ID_algorithms(200)
# simulate_search_name_algorithms(200)
# simulate_cache_latest(200)
# simulate_cache_most_liked(200)
# generate_random_tag_tree(100)