forked from rahulguptakota/paper-To-Reviewer-Matching-System
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsentence_split.py
35 lines (32 loc) · 893 Bytes
/
sentence_split.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import sys
from os import listdir
from os.path import isfile, join
from nltk.tokenize import sent_tokenize
import nltk
from spacy.en import English
en = English()
directories =[join("data/",d) for d in listdir("data/")]
files = []
for d in directories:
files.append([join(d,f) for f in listdir(d) if isfile(join(d, f)) and f[-6:]=="_1.txt"])
for f in files:
fd= open(f[0],'r+')
data = fd.read()
doc = en(data)
for s in list(doc.sents):
print(s.string)
sys.exit()
paragraphs = [p for p in text.split('\n') if p]
print(paragraphs[-1])
#print(data)
ttt = nltk.tokenize.TextTilingTokenizer()
sent_tokenize_list = sent_tokenize(data)
para = ttt.tokenize(data)
#print(sent_tokenize_list)
print(para[-1])
#print(para[-1])
sys.exit()
fd1 = open(f[0][:-4]+"_1.txt","w")
fd1.write(normal)
fd1.close()
fd.close()