-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathmain.py
61 lines (58 loc) · 2.42 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import codecs
import glob
import json
from bs4 import BeautifulSoup as BS
#list all the filenames in this list
files=sorted(glob.glob("../messages/*.html"));
i=0;
OUTPUT={"feelings":[],"love":[],"life":[],"friends":[],"Myself":[]}
nameorigin=codecs.open("../html/friends.htm",'r');
namesoup=BS(nameorigin,'html.parser')
myname=namesoup.title.get_text().replace(" - Friends","")
while(i<len(files)):
#while(i<1):
fi=codecs.open(files[i],'r')
# fi=codecs.open('messages/1019.html','r')
soup=BS(fi,'html.parser')
ti=soup.title.get_text()
conversationwith=ti.replace('Conversation with ','')
if("," not in ti and "Facebook" not in ti):
#all the magic should happen here
number_of_messages=len(soup.find_all("div", class_="message"))
##Do we need it? Message number filter
if(number_of_messages>120):
print("Analysing",ti)
mees=soup.find_all(text=myname)
for me in mees:
#print(me)
parentspan=me.parent
meta=parentspan.findNext('span')
msgheaderdiv=parentspan.parent
mesggrand=msgheaderdiv.parent
sweetp=mesggrand.findNext('p')
toanalyze=sweetp.get_text().lower()
if("i feel" in toanalyze and len(toanalyze)<1250 and len(toanalyze)>100):
poo={"to":conversationwith,"msg":sweetp.get_text(),"meta":meta.get_text()}
OUTPUT["feelings"].append(poo)
print("Found a feeling. Appending Feeling #",len(OUTPUT["feelings"]))
if("love" in toanalyze and len(toanalyze)<1250 and len(toanalyze)>100):
poo={"to":conversationwith,"msg":sweetp.get_text(),"meta":meta.get_text()}
OUTPUT["love"].append(poo)
print("Found love. Appending Love #",len(OUTPUT["love"]))
if("life" in toanalyze and len(toanalyze)<1250 and len(toanalyze)>100):
poo={"to":conversationwith,"msg":sweetp.get_text(),"meta":meta.get_text()}
OUTPUT["life"].append(poo)
print("Found life lesson #",len(OUTPUT["life"]))
if("friends" in toanalyze and len(toanalyze)<1250 and len(toanalyze)>100):
poo={"to":conversationwith,"msg":sweetp.get_text(),"meta":meta.get_text()}
OUTPUT["friends"].append(poo)
print("Found message about friendship #",len(OUTPUT["friends"]))
if("I am" in toanalyze and len(toanalyze)<1250 and len(toanalyze)>100):
poo={"to":conversationwith,"msg":sweetp.get_text(),"meta":meta.get_text()}
OUTPUT["Myself"].append(poo)
print("Found reference to self #",len(OUTPUT["Myself"]))
i+=1
if i>50:
break
with open('data.json', 'w') as fout:
json.dump(OUTPUT, fout)