-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtrainmodel.pl
126 lines (103 loc) · 3.61 KB
/
trainmodel.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
use Config::Simple;
use Getopt::Long;
use File::Basename;
#$matrix=shift;
#$model=shift;
#$classifier=shift;
#$dffilter=shift;
$svmperfFlag=1;
# -Features "E:\Amazon\groceriestest\matrices\groceriestest-.ConcatPOS.Dict.BothNEG.BiGram.NoStem-DocMatrix.txt" -model "E:\Amazon\groceriestest\models\groceriestest-.ConcatPOS.Dict.BothNEG.BiGram.NoStem-DocModel.txt" -classifier svmperftfidf -dffilter 0
#"E:\Amazon\NewModels\Electronics\matrices\Electronics-.ConcatPOS.Dict.BothNEG.BiGram.NoStem-DocMatrix.txt" "E:\Amazon\NewModels\Electronics\models\Electronics-.ConcatPOS.Dict.BothNEG.BiGram.NoStem-DocModel.txt"
$classifier="all";
#$model="$matrix";
$dffilter=0;
$r = GetOptions (
"Features=s" => \$matrix,
"model=s" => \$model,
"classifier=s" => \$classifier,
"Quick!" => \$quickFlag,
"dffilter=f" => \$dffilter
);
$classifier=lc($classifier);
$logfile=$matrix.".LOG.txt";
open(OUT, ">$logfile") or die "can't open logfile $logfile: $!";
if (length ($model)<2) {
$model="$matrix";
}
$modelc=$model;
if (($classifier eq "svmperftfidf")||($classifier eq "all")) {
if ($classifier eq "all") {
$modelc="$model.svmperftfidf.model.txt";
}
$inputfile=$matrix;
#wordlistfile=$inputfile."WordList.txt";
$wordlistfile=$modelc."WordList.txt";
$SVMfileIDF=$inputfile."SVMLightIDF.txt";
$SVMfileMAX=$inputfile."SVMLightMAX.txt";
$wordlistcommand="perl makeWordlistIDFAndMaxFreq.pl \"$inputfile\" $dffilter \"$wordlistfile\"";
$inputcommand = "perl makeSVMLightFile.pl \"$inputfile\" \"$wordlistfile\" 0 \"$SVMfileIDF\" \"$SVMfileMAX\"";
if ((-e "svm_perf_learn") && $svmperfFlag) {
$modelcommand ="svm_perf_learn -c 100 -l 10 -w 3 \"$SVMfileIDF\" \"$modelc\" ";
$checkcommand="perl checkmodel.pl \"$inputfile\" \"$modelc\" ";
runAllCommands();
} else {
$classifier="liblogistic";
}
if (($classifier eq "liblogistic")||($classifier eq "all")) {
if ($classifier eq "all") {
$modelc="$model.liblogistic.model.txt";
}
$inputfile=$matrix;
#wordlistfile=$inputfile."WordList.txt";
$wordlistfile=$modelc."WordList.txt";
$SVMfileIDF=$inputfile."SVMLightIDF.txt";
$SVMfileMAX=$inputfile."SVMLightMAX.txt";
$wordlistcommand="perl makeWordlistIDFAndMaxFreq.pl \"$inputfile\" $dffilter \"$wordlistfile\"";
$inputcommand = "makeSVMLightFile.pl \"$inputfile\" \"$wordlistfile\" 0 \"$SVMfileIDF\" \"$SVMfileMAX\"";
$modelcommand ="train.exe -s 7 -c 100 -e 0.1 \"$SVMfileIDF\" \"$modelc\" ";
$checkcommand="perl checkmodel.pl \"$inputfile\" \"$modelc\" ";
runAllCommands();
}
close OUT;
}
exit;
sub runAllCommands {
$wordlistresult=runCommand($wordlistcommand);
$inputresult=runCommand($inputcommand);
$modelresult=runCommand($modelcommand);
$checkresult=runCommand($checkcommand);
print OUT "$wordlistcommand\n$wordlistresult\n\n$inputcommand\n$checkcommand\n$inputresult\n\n$modelcommand\n$modelresult\n$checkresult\n";
print "Done training $model.\n";
if (-e "$matrix.cfg") {
$cfg = new Config::Simple("$matrix.cfg");
$cfg->autosave(1);
$cfg->param("wordlistfile",$wordlistfile);
$cfg->param("SVMfileIDF",$SVMfileIDF);
$cfg->param("SVMfileMAX", $SVMfileMAX);
$cfg->param("model",$model);
$cfg->param("dffilter",$dffilter);
$cfg->write();
}
}
sub runCommand {
my $command=shift;
my $exe;
my $subst;
my $pl;
my $ret;
if ($command=~/(perl\s+((.*?).pl))/ig) {
$subst=$1;
$pl=$2;
$exe=$3.".exe";
if (!(-e $exe)) {
if (-e "perl2exe\\perl2exe.exe") {
# $ret=`perl2exe\\perl2exe.exe $pl`;
}
}
if (-e $exe) {
$command=~s/$subst/$exe/ig;
}
}
$ret=`$command`;
return $ret;
}