-
Notifications
You must be signed in to change notification settings - Fork 7
/
scmvGraphCompare.py
120 lines (98 loc) · 3.63 KB
/
scmvGraphCompare.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import argparse,csv
import networkx as nx
import operator
parser = argparse.ArgumentParser(description='Compare two graphs')
parser.add_argument('-filestub',help='If there is a common prefix in the file paths add it here')
parser.add_argument('-filesuffix',help='If there is a common suffix in the file paths add it here')
parser.add_argument('-files',nargs='*',help='gdf files you want to compare')
parser.add_argument('-labels',nargs='*',help='Labels in same order as gdf files you want to compare')
parser.add_argument('-outpath',help='Filename path for output files')
parser.add_argument('-compare',help='If there is a common prefix in the file paths add it here')
args=parser.parse_args()
if len(args.files)!=len(args.labels): exit(-1)
path=args.outpath
labels=args.labels
if args.filestub!=None: filestub=args.filestub
else: filestub=''
if args.filesuffix!=None: filesuffix=args.filesuffix
else: filesuffix=''
def getCountsFilename(path,label):
return path+'/counts_'+label+'.csv'
def getCountsCSVwriter(path,label):
fn=getCountsFilename(path,label)
f=open(fn,'ab+')
return f,csv.writer(f)
for label in labels:
fn=getCountsFilename(path,label)
f=open(fn,'wb+')
writer=csv.writer(f)
writer.writerow(['userid','username','indegree','outdegree','inNorm','fo_count','normaliser'])
f.close()
inUsers={}
graphs={}
print filestub,args.files
for i in range(0,len(args.files)):
label=labels[i]
fn=filestub+args.files[i]+filesuffix
print fn
graphs[label]={'graph':nx.read_graphml(fn),'name':label}
print args.files[i]
#DG=nx.read_graphml(args.files[i])
#label=args.labels[i]
#for node in DG.nodes():
# writer.writerow([label,node,DG.node[node]['label'],DG.in_degree(node),DG.out_degree(node)])
for graph in graphs:
graphs[graph]['inNodes']=[]
graphs[graph]['outnodecount']=0
DG=graphs[graph]['graph']
for node in DG.nodes():
if DG.in_degree(node)>0:
user=DG.node[node]['label']
graphs[graph]['inNodes'].append(user)
if user not in inUsers:
inUsers[user]={'fo_count':DG.node[node]['fo_count']}
if DG.out_degree(node)>0: graphs[graph]['outnodecount']=graphs[graph]['outnodecount']+1
for user in inUsers:
for graph in graphs:
inUsers[user][graph]={'incount':0,'normcount':0}
data={}
for graph in graphs:
print "outnodecount",graphs[graph]['outnodecount'],
data[graph]=set(graphs[graph]['inNodes'])
DG=graphs[graph]['graph']
f,writer=getCountsCSVwriter(path,graph)
for node in DG.nodes():
user=DG.node[node]['label']
if user in inUsers:
inUsers[user][graph]['incount']=DG.in_degree(node)
inUsers[user][graph]['normcount']=float(DG.in_degree(node))/float(graphs[graph]['outnodecount'])
writer.writerow([node,DG.node[node]['label'],DG.in_degree(node),DG.out_degree(node),float(DG.in_degree(node))/float(graphs[graph]['outnodecount']),int(DG.node[node]['fo_count']),int(graphs[graph]['outnodecount'])])
f.close()
if args.compare=='false':exit(-1)
#based on O'reilly Mining Social Web p. 157
#intersection=set()
keys=data.keys()
#for k in range(len(keys)-1):
# #THE FOLLOWING DOESN'T FIND THE INTERSECTION ACROSS ALL SETS?
# intersection=data[keys[k]].intersection(data[keys[k-1]])
sets=[]
for key in data:
sets.append(data[key])
#This seems to be the standard way of finding the intersection of several sets:
intersection=set.intersection(*sets)
msg='Common items shared amongst: %s' % ', '.join(keys).strip()
print msg
print '-' * len(msg)
fn=path+'/intersection.csv'
f=open(fn,'wb+')
writer=csv.writer(f)
txt=['name']
for graph in graphs: txt.append(graph)
writer.writerow(txt)
for i in intersection:
#print i.strip(),inUsers[i.strip()]
txt=[i]
for graph in graphs:
txt.append(inUsers[i][graph]['normcount'])
writer.writerow(txt)
f.close()