diff --git a/Dimob.pl b/Dimob.pl index 52ef9cc..8819c2b 100755 --- a/Dimob.pl +++ b/Dimob.pl @@ -22,7 +22,7 @@ =head1 AUTHORS =head1 LAST MAINTAINED - December 16th, 2016 + January 16th, 2019 =cut @@ -138,12 +138,30 @@ =head1 LAST MAINTAINED my $i = 1; open my $fhgd, '>', $outputfile or die "Cannot open output.txt: $!"; - foreach my $island (@islands) { - my $start = $island->[0]; - my $end = $island->[1]; - print $fhgd "GI_$i\t$start\t$end\n"; - $i++; + + if ($outputfile =~ /\.txt$/) { + #legacy output + $logger->info("Warning: txt output is now depreciated. Support has been added to output GFF3 formatted documents. Use (any) other extension to enable GFF output. See: https://github.com/brinkmanlab/islandpath/issues/7"); + foreach my $island (@islands) { + my $start = $island->[1]; + my $end = $island->[2]; + print $fhgd "GI_$i\t$start\t$end\n"; + $i++; + } + } else { + #GFF output + print $fhgd "##gff-version 3\n"; + foreach my $island (@islands) { + my $label = $island->[0]; + my $start = $island->[1]; + my $end = $island->[2]; + my $strand = $island->[3]; + #TODO use proper chromosome sequence id + print $fhgd "$label\tislandpath\tgenomic_island\t$start\t$end\t.\t$strand\t.\tID=$label\_gi$i\n"; + $i++; + } } + close $fhgd; $logger->info("Removing tmp files"); diff --git a/lib/Dimob.pm b/lib/Dimob.pm index 423f463..5b4bede 100644 --- a/lib/Dimob.pm +++ b/lib/Dimob.pm @@ -224,7 +224,7 @@ sub run_dimob { next; } - push (@gis, [ $_->[0]{start}, $_->[-1]{end}]); + push (@gis, [ $_->[0]{seq_id}, $_->[0]{start}, $_->[-1]{end}, $_->[0]{strand}]); #my $start = $_->[0]{start}; #my $end = $_->[-1]{end}; #print "$start\t$end\n"; diff --git a/lib/Dimob/genomicislands.pm b/lib/Dimob/genomicislands.pm index 8f5abf7..2028b8f 100644 --- a/lib/Dimob/genomicislands.pm +++ b/lib/Dimob/genomicislands.pm @@ -546,10 +546,15 @@ sub defline2gi { my $orf_start; my $orf_end; my $pid; + my $seq_id; + my $strand = '.'; # print "$orf1\n"; - if ( $orf1 =~ /\|:(\d+)\.\.(\d+)\)/ ) { - $orf_start = $1; - $orf_end = $2; + + if ( $orf1 =~ /.*\|([^:]*):(\d+)\.\.(\d+)\)/ ) { + $seq_id = $1; + $strand = '+'; + $orf_start = $2; + $orf_end = $3; my $coordinate = "$orf_start..$orf_end"; $pid = $ptt_table_hashref->{$coordinate}->{'PID'}; if($extended_ids) { @@ -559,9 +564,11 @@ sub defline2gi { unless(defined($pid)){ #warn "Could not find pid"; } - }elsif ( $orf1 =~ /\|:c(\d+)\.\.(\d+)\)/ ) { - $orf_start = $1; - $orf_end = $2; + }elsif ( $orf1 =~ /.*\|([^:]*):c(\d+)\.\.(\d+)\)/ ) { + $seq_id = $1; + $strand = '-'; + $orf_start = $2; + $orf_end = $3; my $coordinate = "$orf_start..$orf_end"; $pid = $ptt_table_hashref->{$coordinate}->{'PID'}; if($extended_ids) { @@ -582,6 +589,8 @@ sub defline2gi { #print "$orf_start and $orf_end\n"; $orf_index->{'ORF_label'} = $pid; + $orf_index->{'seq_id'} = $seq_id; + $orf_index->{'strand'} = $strand; $orf_index->{'start'}=$orf_start; $orf_index->{'end'}=$orf_end; push @result_orfs, $orf_index; diff --git a/lib/GenomeUtils.pm b/lib/GenomeUtils.pm index b213cc1..cdd6da0 100644 --- a/lib/GenomeUtils.pm +++ b/lib/GenomeUtils.pm @@ -344,6 +344,8 @@ sub read_and_convert { # open all files my $total_length = $seq->length(); + my $seq_id = $seq->object_id(); + # my $total_seq = $seq->seq(); #Create fna file @@ -418,7 +420,7 @@ sub read_and_convert { my $strand_expand = $strand >= 0 ? '+' : '-'; my $strand_expand2 = $strand >= 0 ? '' : 'c'; - my $desc = "\:$strand_expand2" . "$start..$end"; + my $desc = "$seq_id\:$strand_expand2" . "$start..$end"; $desc = "ref\|$ref_accnum\|gi\|$gi\|" . $desc;