-
Notifications
You must be signed in to change notification settings - Fork 4
/
venn.py
109 lines (92 loc) · 3.25 KB
/
venn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import json
import numpy as np
from matplotlib import pyplot as plt
from matplotlib_venn import venn3, venn3_circles, venn2, venn2_circles
import glob
import os
prefix = "serverless"
basefiles = glob.glob("*-literature-base.json")
if len(basefiles) == 1:
prefix = os.path.basename(basefiles[0]).split("-")[0]
biblio_filename = "{}-literature-bibliography.json".format(prefix)
analysis_filename = "{}-literature-analysis.json".format(prefix)
searchterms = json.load(open("scraper/searchterms.json"))
f = open(biblio_filename)
biblio = json.load(f)
f = open(analysis_filename)
analysis = json.load(f)
terms = searchterms # ("serverless application", "serverless computing", "serverless", "function-as-a-service", "lambda", "cloud function", "faas")
# TODO! make generic
if prefix == "serverless":
chosenterms = (terms[1], terms[2], terms[6])
else:
chosenterms = (terms[0],)
allmterms = []
for ident in biblio:
title = biblio[ident]["title"].lower()
mterms = []
for term in terms:
if term in title:
mterms.append(term)
print("keywords for {:3d}: {}".format(int(ident), mterms))
allmterms.append(mterms)
pairs = {}
for mterms in allmterms:
if len(mterms) > 1:
pairs[str(mterms)] = pairs.setdefault(str(mterms), 0) + 1
for pair in pairs:
print("venn-able pairing: {:3d} {}".format(pairs[pair], pair))
def getsubsets(allmterms, t1, t2, t3):
# Syntax: Abc, aBc, ABc, abC, AbC, aBC, ABC (where a = not A)
subsets = [0, 0, 0, 0, 0, 0, 0]
for mterms in allmterms:
if t1 in mterms and not t2 in mterms and not t3 in mterms:
subsets[0] += 1
elif not t1 in mterms and t2 in mterms and not t3 in mterms:
subsets[1] += 1
elif t1 in mterms and t2 in mterms and not t3 in mterms:
subsets[2] += 1
elif not t1 in mterms and not t2 in mterms and t3 in mterms:
subsets[3] += 1
elif t1 in mterms and not t2 in mterms and t3 in mterms:
subsets[4] += 1
elif not t1 in mterms and t2 in mterms and t3 in mterms:
subsets[5] += 1
elif t1 in mterms and t2 in mterms and t3 in mterms:
subsets[6] += 1
return subsets
subsets_sl=getsubsets(allmterms, *chosenterms)
print("weighted venn subsets", subsets_sl)
plt.figure(figsize=(7, 7))
plt.rc("font", size=16)
v = venn3(subsets=subsets_sl, set_labels=chosenterms)
#v.get_patch_by_id('100').set_alpha(1.0)
#v.get_patch_by_id('100').set_color('white')
#v.get_label_by_id('100').set_text('Unknown')
#v.get_label_by_id('A').set_text('Set "A"')
c = venn3_circles(subsets=subsets_sl, linestyle='dashed')
#c[0].set_lw(1.0)
#c[0].set_ls('dotted')
plt.title("Literature keywords relations")
#plt.annotate('Unknown set', xy=v.get_label_by_id('100').get_position() - np.array([0, 0.05]), xytext=(-70,-70),
# ha='center', textcoords='offset points', bbox=dict(boxstyle='round,pad=0.5', fc='gray', alpha=0.1),
# arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0.5',color='gray'))
plt.show()
aca = 0
ind = 0
mix = 0
for ident in analysis:
aflag = analysis[ident]["academic"]
iflag = analysis[ident]["industry"]
if aflag and iflag:
mix += 1
elif aflag:
aca += 1
elif iflag:
ind += 1
else:
print("Error in", ident)
v = venn2(subsets=(aca, ind, mix), set_labels=("academia", "industry"))
c = venn2_circles(subsets=(aca, ind, mix), linestyle='dashed')
plt.title("Literature institution relations")
plt.show()