-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgi_to_taxid.pm
executable file
·88 lines (71 loc) · 1.85 KB
/
gi_to_taxid.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#
# The functions in this package return NCBI Taxids for NCBI GIs
#
# Depends on sorted_file_search_by_field.pm package
# Depends also on local copy of NCBI taxonomy db (see below)
#
# Mark Stenglein, June 7, 2011
#
package gi_to_taxid;
use DBI;
use strict;
use base 'Exporter';
our @EXPORT = qw(gi_to_taxid);
# connect to mysql database
my $dbh = DBI->connect("DBI:mysql:database=NCBI_Taxonomy",
"NCBI_Taxonomy", "NCBI_Taxonomy",
{'RaiseError' => 1}) or die $DBI::errstr;
sub gi_to_taxid
{
my @taxids = _gi_to_taxid(@_);
return @taxids;
}
sub _gi_to_taxid
{
my @gis = @_;
my %gi_taxid_map = ();
# check to see if GIs in gi|XXXXX| format
for (my $i = 0; $i < (scalar @gis); $i++)
{
# if this gi is in the form it is in NCBI BLAST results
if ($gis[$i] =~ /gi\|(\d+)\|/)
{
# warn "replacing $gis[$i] with $1\n";
$gis[$i] = $1;
}
}
my @taxids = ();
# determine TAXID for this GI
my $num_gis = scalar @gis;
my $qs = ("?");
if ($num_gis > 1)
{
my @qs_array = ("?") x (scalar @gis);
$qs = join (", ", @qs_array);
}
my $sql_string = "SELECT gi, taxid FROM gi_taxid_map where gi in ( $qs )";
warn "$sql_string\n";
my $sth = $dbh->prepare( $sql_string );
$sth->execute(@gis);
# warn "GIS: @gis\n";
# iterate through rows of mysql output
# need to do this because:
#
# (1) some GIs might not return TAXIDs
# (2) results not necessarily in order of input
#
while ( my ($gi, $taxid) = $sth->fetchrow_array())
{
# warn "$gi->$taxid\n";
$gi_taxid_map{$gi} = $taxid;
}
# create array to return
# will return an array that is 1:1 with input array
foreach my $gi (@gis)
{
push @taxids, $gi_taxid_map{$gi};
}
return @taxids;
}
# PERL packages must return a true value
1;