forked from stamatak/standard-RAxML
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathProteinModelSelection.pl
114 lines (93 loc) · 3.19 KB
/
ProteinModelSelection.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env perl
#print $ARGV[0]." ".$ARGV[1]." ".$#ARGV."\n";
#adapt as required, modify to read ./raxmHPC if raxml is not in your path
$raxmlExecutable = "raxmlHPC-AVX";
# $raxmlExecutable = "raxmlHPC-SSE3";
# $raxmlExecutable = "raxmlHPC-PTHREADS -T 4";
# $raxmlExecutable = "raxmlHPC-PTHREADS-AVX -T 4";
# $raxmlExecutable = "raxmlHPC-PTHREADS-SSE3 -T 4";
if(($#ARGV < 0) || ($#ARGV > 1))
{
print "ERROR: usage: \"perl ProteinModelSelection.pl alignmentFileName [modelFileName] \"\n";
exit;
}
$alignmentName = $ARGV[0];
$UNLIKELY = -1.0E300;
sub getLH
{
my $fileID = $_[0];
open(CPF, $fileID);
my @lines = <CPF>;
close(CPF);
my $numIT = @lines;
my $lastLH = pop(@lines);
my $k = index($lastLH, '-');
my $LH = substr($lastLH, $k);
return $LH;
}
sub getTIME
{
my $fileID = $_[0];
open(CPF, $fileID);
my @lines = <CPF>;
close(CPF);
my $numIT = @lines;
my $lastLH = pop(@lines);
my $k = index($lastLH, '-');
my $TIME = substr($lastLH, 0, $k-1);
return $TIME;
}
@AA_Models = ("DAYHOFF", "DCMUT", "JTT", "MTREV", "WAG", "RTREV", "CPREV", "VT", "BLOSUM62", "MTMAM",
"LG", "MTART", "MTZOA", "PMB", "HIVB", "HIVW", "JTTDCMUT", "FLU",
"DAYHOFFF", "DCMUTF", "JTTF", "MTREVF", "WAGF", "RTREVF", "CPREVF", "VTF", "BLOSUM62F",
"MTMAMF", "LGF", "MTARTF", "MTZOAF", "PMBF", "HIVBF", "HIVWF", "JTTDCMUTF", "FLUF");
if($#ARGV == 1)
{
print "Splitting up multi-gene alignment\n";
$partition = $ARGV[1];
$cmd = $raxmlExecutable." -f s -m PROTCATJTT -p 12345 -s ".$alignmentName." -q ".$partition." -n SPLIT_".$alignmentName." \> SPLIT_".$alignmentName."_out";
system($cmd);
$count = 0;
while(open(CPF, $alignmentName.".GENE.".$count))
{
close CPF;
print "PARTITION: ".$count."\n";
#print "perl ProteinModelSelection.pl ".$alignmentName.".GENE.".$count;
system("perl ProteinModelSelection.pl ".$alignmentName.".GENE.".$count);
$count = $count + 1;
}
}
else
{
#print "Determining AA model data\n";
#print "Computing randomized stepwise addition starting tree number :".$i."\n";
$cmd = $raxmlExecutable." -y -p 12345 -m PROTCATJTT -s ".$alignmentName." -n ST_".$alignmentName." \> ST_".$alignmentName."_out";
system($cmd);
$numberOfModels = @AA_Models;
for($i = 0; $i < $numberOfModels; $i++)
{
$aa = "PROTGAMMA".$AA_Models[$i];
$cmd = $raxmlExecutable." -f e -m ".$aa." -s ".$alignmentName." -t RAxML_parsimonyTree.ST_".$alignmentName." -n ".$AA_Models[$i]."_".$alignmentName."_EVAL \> ".$AA_Models[$i]."_".$alignmentName."_EVAL.out\n";
#print($cmd);
system($cmd);
}
for($i = 0; $i < $numberOfModels; $i++)
{
$logFileName = "RAxML_log.".$AA_Models[$i]."_".$alignmentName."_EVAL";
#print $logFileName."\n";
$lh[$i] = getLH($logFileName);
}
$bestLH = $UNLIKELY;
$bestI = -1;
for($i = 0; $i < $numberOfModels; $i++)
{
#print "Model: ".$AA_Models[$i]." LH: ". $lh[$i]."\n";
if($lh[$i] > $bestLH)
{
$bestLH = $lh[$i];
$bestI = $i;
}
}
print "Best Model : ".$AA_Models[$bestI]."\n\n";
$bestModel = $AA_Models[$bestI];
}