-
Notifications
You must be signed in to change notification settings - Fork 0
/
detector.py
47 lines (37 loc) · 1.01 KB
/
detector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import pickle as c
import os
from collections import Counter
from sklearn import *
def load(clf_file):
with open(clf_file) as fp:
clf=c.load(fp)
return clf
def make_dict():
direc='emails/'
files=os.listdir(direc)
#print(files)
emails=[direc + email for email in files]
words=[]
c=len(emails)
for email in emails:
f=open(email,encoding='latin-1')#f=open(filename,errors='ignore')
blob=f.read()
words+=blob.split(' ')
print(c)
c-=1
for i in range(len(words)):
if not words[i].isalpha():
words[i]=' '
words = list(filter(lambda x: x!= ' ', words))
dictionary=Counter(words)
#del.dictionary['']
return(dictionary.most_common(3000))
with open('model_pickle','rb') as f:
clf=c.load(f)
d=make_dict()
features=[]
from_user=input('>')
for words in d:
features.append(from_user.count(words[0]))
res=clf.predict([features])
print(['ham','spam'][res[0]])