forked from rahulguptakota/paper-To-Reviewer-Matching-System
-
Notifications
You must be signed in to change notification settings - Fork 0
/
features_k7.py
47 lines (40 loc) · 1.27 KB
/
features_k7.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import sys
from os import listdir
from os.path import isfile, join
from nltk.tokenize import sent_tokenize
import nltk
import xml.etree.ElementTree as ET
import csv
directories =[join("data/",d) for d in listdir("data/")]
files = []
inverseref_count = []
out_file = open('features_k7.txt','w')
for d in directories:
files.append([join(d,f) for f in listdir(d) if isfile(join(d, f)) and f[-4:]==".xml"])
result = {}
for f in files:
# print(f[0])
count_references=0
tree = ET.parse(f[0])
root = tree.getroot()
for citation in root.iter('citation'):
count_references = count_references + 1
inverseref_count.append(1/count_references)
# out_file.write(f[0].split("/")[1] + " " + str((1.0/count_references)) + "\n")
global result
result[str(f[0].split("/")[1]) ] = (1.0/count_references)
print(str(f[0].split("/")[1]), (1.0/count_references))
graph = {}
i=0
with open('index.csv', newline='') as csvfile:
spamreader = csv.reader(csvfile, delimiter=',', quotechar='|')
for row in spamreader:
if row[1] in graph.keys() and not row[0] in graph[row[1]]:
graph[row[1]].append(row[0])
else:
graph[row[1]] = [row[0],]
print(str(row[0]), str(row[1]))
if i> 0:
out_file.write(str(row[0]) + " " + str(row[1]) + " " + str(result[str(row[0])]) + "\n")
i+=1
sys.exit()