-
Notifications
You must be signed in to change notification settings - Fork 7
/
ToxiCRTest.py
87 lines (70 loc) · 3.5 KB
/
ToxiCRTest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# Copyright Software Engineering Analytics Lab (SEAL), Wayne State University, 2022
# Authors: Jaydeb Sarker <jaydebsarker@wayne.edu> and Amiangshu Bosu <abosu@wayne.edu>
# This program is free software; you can redistribute it and/or
#modify it under the terms of the GNU General Public License
# version 3 as published by the Free Software Foundation.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
from ToxiCR import ToxiCR
#Best configurations for each algorithms are listed.
# Pretrained models for the best configurations are also included.
#Random Forest, recommended, if you do not have GPU
#toxicClassifier=ToxiCR(ALGO="RF", count_profanity=True, remove_keywords=False,split_identifier=False,
# embedding="tfidf", load_pretrained=True)
#BiLSTM
# toxicClassifier=ToxiCR(ALGO="biLSTM", count_profanity=True, remove_keywords=False,split_identifier=True,
# embedding="fasttext", load_pretrained=True)
#LSTM
#toxicClassifier=ToxiCR(ALGO="LSTM", count_profanity=True, remove_keywords=True,split_identifier=True,
# embedding="glove", load_pretrained=True)
# Gated Recurrent Unit (GRU)
#toxicClassifier=ToxiCR(ALGO="GRU", count_profanity=True, remove_keywords=False,split_identifier=True,
# embedding="glove", load_pretrained=True)
# Deep Pyramid Convolutional neural networks (DPCNN)
# toxicClassifier=ToxiCR(ALGO="CNN", count_profanity=True, remove_keywords=False,split_identifier=False,
# embedding="fasttext", load_pretrained=True)
#Decision Tree (DT)
# toxicClassifier=ToxiCR(ALGO="DT", count_profanity=True, remove_keywords=True,split_identifier=False,
# embedding="tfidf", load_pretrained=True)
#Logistic Regression (LR)
# toxicClassifier=ToxiCR(ALGO="LR", count_profanity=True, remove_keywords=True, split_identifier=False,
# embedding="tfidf", load_pretrained=True)
#Gradient Boosting Tree (GBT)
# toxicClassifier=ToxiCR(ALGO="GBT", count_profanity=True, remove_keywords=True, split_identifier=False,
# embedding="tfidf", load_pretrained=True)
# Support Vector Machine(SVM)
# toxicClassifier=ToxiCR(ALGO="SVM", count_profanity=True, remove_keywords=False,split_identifier=True,
# embedding="fasttext", load_pretrained=True)
# Bert, the best performing model
toxicClassifier = ToxiCR(
ALGO="BERT",
count_profanity=False,
remove_keywords=True,
split_identifier=False,
embedding="tfidf",
load_pretrained=True
)
toxicClassifier.init_predictor()
sentences = [
"go fuck yourself",
"this is crap",
"thank you for the information",
"yeah that sucked, fixed, Done.",
"Crap, this is an artifact of a previous revision. It's simply the last time a change was made to Tuskar's cloud.",
"Ah damn I misread the bug -_-",
"wtf...",
"I appreciate your help.",
"fuuuuck",
"what the f*ck",
"absolute shit",
"Get the hell outta here",
"shi*tty code",
"you are an absolute b!tch",
"Nothing particular to worry about",
"You need to kill the process for it to work"
]
results = toxicClassifier.get_toxicity_probability(sentences)
for i in range(len(sentences)):
print("\"" + sentences[i] + "\" ->" + str(results[i])) # probablity of being toxic.