-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconnective_middle_frequency.py
48 lines (37 loc) · 1.64 KB
/
connective_middle_frequency.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import json
import os
# Variables
data = []
connectives = {}
for filename in os.listdir(os.getcwd()+ "/data"):
if "relations" not in filename:
continue
# Import relations data as a JSON object
for line in open("data/" + filename, 'r'):
data.append(json.loads(line))
# Remove sentences with implicit connectives
data = filter(lambda line: line['Type'] != 'Implicit', data)
for line in data:
# Get end of Arg1
arg1_chars = line['Arg1']['CharacterSpanList']
arg1_end = arg1_chars[len(arg1_chars)-1][1]
# Get start of Arg2
arg2_chars = line['Arg2']['CharacterSpanList']
arg2_start = arg2_chars[len(arg2_chars)-1][0]
# Get start and end of Connective
conn_chars = line['Connective']['CharacterSpanList']
if len(conn_chars) == 0: # Probably errors in dataset, because some
continue
conn_start = conn_chars[len(conn_chars)-1][0]
conn_end = conn_chars[len(conn_chars)-1][1]
# Identify 'middle connective'
if (arg1_end < conn_start) and (conn_end < arg2_start):
connectives['TOTAL'] = connectives.get('TOTAL', 0) + 1 # Count total number of middle connectives
connectives[line['Connective']['RawText'].lower()] = connectives.get(line['Connective']['RawText'].lower(), 0) + 1
# Output results
output_file = "data/txt/middle_connective_frequency.txt"
open(output_file, 'w') # Clear contents of file
out = open(output_file, 'a+')
for connective in sorted(connectives, key=connectives.get):
s = connective + ": " + str(connectives[connective]) + "\n"
out.write(s)