-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathblastdb_make.pl
64 lines (58 loc) · 1.63 KB
/
blastdb_make.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/perl
use strict;
use File::Copy;
use File::Find::Object::Rule;
my $input = "";
my $prot = "F";
if ($#ARGV == -1) {
print "\n";
print "This script creates BLAST databases for all Fasta (*.fas) files in the current directory. It expects the file names to be in the form: \"genus-name_species-name.fas\" and will truncate the resulting database name to the first letter of the genus and first 3 letters of the species name.\n";
print "\nSyntax:\n";
print " -f X use file X as input or 'all' to parse all files in the current dir\n";
print " -prot use this switch if protein sequences are used\n";
print "\n";
exit;
}
foreach my $argnum (0 .. $#ARGV) {
if ($ARGV[$argnum] =~ "-f") {
$input = $ARGV[$argnum+1];
}
if ($ARGV[$argnum] =~ "-prot") {
$prot = "T";
}
}
if ($input) {
if ($input eq "all") {
my $ffor = File::Find::Object::Rule->file()->name("*.fas");
$ffor->maxdepth(1);
my @filelist = $ffor->in(".");
foreach my $file (@filelist) {
format_seq_files($file);
}
}
elsif (-e $input) {
format_seq_files($input);
}
}
else {
print "\nNo file specified or file does not exists. Pass a filename after the \"-f\" parameter (or 'all' to use all files in the current dir)\n";
}
exit();
sub format_seq_files {
my $file = $_[0];
print "$file\n";
my $spec1 = substr($file,0,1);
my $spec2 = substr($file,index($file,"\_")+1,3);
my $spec = ucfirst($spec1.$spec2);
my $dbtype = "nucl";
if ($prot eq "T") {
$spec = $spec."\_aa";
$dbtype = "prot";
}
else {
$spec = $spec."\_nn";
}
my $run_dbf = "makeblastdb -in $file -title $spec -dbtype $dbtype -out $spec";
system $run_dbf;
copy($file, "$spec\.fas");
}