-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclean.py
34 lines (25 loc) · 867 Bytes
/
clean.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import re
devs = list(open('data/SemEval2015-dev.tsv', "r").readlines())
cleanFile = open("data/SemEval2015-dev-clean.tsv", 'w')
for ex in devs:
# if ex=='Not Available':
# continue
strs = re.split(r'\t+', ex)
if strs[3].strip() != 'Not Available':
cleanFile.write(ex)
devs = list(open('data/dev.csv', "r").readlines())
cleanFile = list(open("data/SemEval2015-dev-clean.tsv", 'r').readlines())
cleaned=[]
for line in cleanFile:
strs = re.split(r'\t+', line)
cleaned.append(strs[0])
# cleanFile.close()
cleanFile = open("data/SemEval2015-dev-clean.tsv", 'ab')
for ex in devs:
# if ex=='Not Available':
# continue
strs = re.split(r'\t+', ex)
if strs[0].strip() not in cleaned:
cleanFile.write("%s\t%s\t%s\t%s\t\n" % (strs[0],'1', strs[3].strip(), strs[1].strip() ))
cleanFile.close()
devs.close()