-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSpamFilterMain.java
executable file
·88 lines (74 loc) · 3.05 KB
/
SpamFilterMain.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import java.io.File;
import java.io.IOException;
import java.nio.file.Paths;
/**
* This class handles loading the data files for you. It then uses
* the NaiveBayes class you implemented to train a Naive Bayes
* classifier, and classify test emails.
*/
public class SpamFilterMain {
public static void main(String[] args) throws IOException {
// Print current working directory path.
// IMPORTANT: the "data/" directory is required to be in the same
// directory as where this file is executed. Check the
// console output if you're unsure.
String cwd = Paths.get(".").toAbsolutePath().normalize().toString();
System.out.println("# Current working directory (CWD): " + cwd);
File dataDir = new File("data");
if (!dataDir.exists() || !dataDir.isDirectory()) {
System.out.println("# Could not find the data directory. " +
"Make sure it is under the CWD printed above.");
return;
}
File[] trainHams = null;
File[] trainSpams = null;
File[] tests = null;
// Traverse the data directory to get training and testing files.
// Note: Avoiding any sort of path hardcoding (eg forward or backward
// slashes) to accommodate any OS.
File[] subDirs = dataDir.listFiles();
for (File dir : subDirs) {
if (dir.getName().equals("train")) {
File[] trainDirs = dir.listFiles();
for (File trainDir : trainDirs) {
if (trainDir.getName().equals("ham")) {
trainHams = trainDir.listFiles();
} else { // spams
trainSpams = trainDir.listFiles();
}
}
} else { // test
tests = dir.listFiles();
}
}
if (sanityCheck(trainHams, trainSpams, tests)) {
System.out.println("# Testing/training files loaded successfully.");
NaiveBayes nb = new NaiveBayes();
System.out.println("# Training...");
nb.train(trainHams, trainSpams);
System.out.println("# Done training.");
System.out.println("# Test results:");
nb.classify(tests);
}
}
/*
* Perform simple sanity checks on the training/testing files loaded
* before passing them into NaiveBayes.
*/
public static boolean sanityCheck(File[] trainHams, File[] trainSpams, File[] tests) {
boolean pass = true;
if (trainHams == null || trainHams.length == 0) {
System.out.println("# Error loading training ham files.");
pass = false;
}
if (trainSpams == null || trainSpams.length == 0) {
System.out.println("# Error loading training spam files.");
pass = false;
}
if (tests == null || tests.length == 0) {
System.out.println("# Error loading testing files.");
pass = false;
}
return pass;
}
}