-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #8 from andrewjpage/master
try to fix broken input fastas and more overall tests
- Loading branch information
Showing
33 changed files
with
575 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
package MLST::Exceptions; | ||
|
||
use Exception::Class ( | ||
MLST::Exceptions::FileDoestExist => { description => 'File doesnt exist' }, | ||
); | ||
|
||
1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
=head1 NAME | ||
NormaliseFasta - Take in a Fasta file, check for invalid characters and build a corrected file if needed. This is needed for NCBI makeblastdb which doesnt like the pipe character in the sequence name | ||
=head1 SYNOPSIS | ||
use MLST::NormaliseFasta; | ||
my $output_fasta = MLST::NormaliseFasta->new( | ||
fasta_filename => 'Filename.fasta' | ||
); | ||
$output_fasta->processed_fasta_filename(); | ||
=cut | ||
|
||
package MLST::NormaliseFasta; | ||
use Moose; | ||
use Bio::SeqIO; | ||
use File::Basename; | ||
use MLST::Types; | ||
|
||
has 'fasta_filename' => ( is => 'ro', isa => 'MLST::File', required => 1 ); | ||
has 'working_directory' => ( is => 'ro', isa => 'Str', required => 1 ); | ||
|
||
has '_normalised_fasta_filename' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__normalised_fasta_filename' ); | ||
|
||
sub _build__normalised_fasta_filename | ||
{ | ||
my($self) = @_; | ||
my $fasta_obj = Bio::SeqIO->new( -file => $self->fasta_filename , -format => 'Fasta'); | ||
|
||
while(my $seq = $fasta_obj->next_seq()) | ||
{ | ||
if($seq->id =~ m/\|/ ) | ||
{ | ||
return $self->_rename_sequences(); | ||
} | ||
} | ||
|
||
return $self->fasta_filename; | ||
} | ||
|
||
sub _rename_sequences | ||
{ | ||
my($self) = @_; | ||
my $in_fasta_obj = Bio::SeqIO->new( -file => $self->fasta_filename , -format => 'Fasta'); | ||
my($filename, $directories, $suffix) = fileparse($self->fasta_filename); | ||
my $output_filename = $self->working_directory.'/'.$filename.$suffix ; | ||
my $out_fasta_obj = Bio::SeqIO->new(-file => "+>".$output_filename , -format => 'Fasta'); | ||
|
||
my $counter = 1; | ||
while(my $seq = $in_fasta_obj->next_seq()) | ||
{ | ||
$seq->id($counter.""); | ||
$out_fasta_obj->write_seq($seq); | ||
$counter++; | ||
} | ||
return $output_filename; | ||
} | ||
|
||
sub processed_fasta_filename | ||
{ | ||
my($self) = @_; | ||
return $self->_normalised_fasta_filename; | ||
} | ||
|
||
no Moose; | ||
__PACKAGE__->meta->make_immutable; | ||
1; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
=head1 NAME | ||
File - Does a file exist? | ||
=head1 SYNOPSIS | ||
=cut | ||
|
||
package MLST::Validate::File; | ||
use Moose; | ||
|
||
sub does_file_exist | ||
{ | ||
my($self, $file) = @_; | ||
return 1 if(-e $file); | ||
|
||
return 0; | ||
} | ||
|
||
no Moose; | ||
__PACKAGE__->meta->make_immutable; | ||
1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#!/usr/bin/env perl | ||
use strict; | ||
use warnings; | ||
use File::Temp; | ||
use Bio::SeqIO; | ||
|
||
BEGIN { unshift(@INC, './modules') } | ||
BEGIN { | ||
use Test::Most; | ||
use_ok('MLST::NormaliseFasta'); | ||
} | ||
|
||
my $tmpdirectory_obj = File::Temp->newdir(CLEANUP => 1); | ||
my $tmpdirectory = $tmpdirectory_obj->dirname(); | ||
|
||
ok((my $output_fasta = MLST::NormaliseFasta->new( | ||
fasta_filename => 't/data/contigs.fa', | ||
working_directory => $tmpdirectory | ||
)),'Initalise file wihtout pipe characters in sequence names'); | ||
is($output_fasta->processed_fasta_filename(),'t/data/contigs.fa', 'file without pipe characters shouldnt change at all'); | ||
|
||
|
||
ok(($output_fasta = MLST::NormaliseFasta->new( | ||
fasta_filename => 't/data/contigs_pipe_character_in_seq_name.fa', | ||
working_directory => $tmpdirectory | ||
)),'Initalise file with pipe characters in filename'); | ||
is($output_fasta->processed_fasta_filename(), $tmpdirectory.'/contigs_pipe_character_in_seq_name.fa', 'file without pipe characters shouldnt change at all'); | ||
ok((my $in_fasta_obj = Bio::SeqIO->new( -file => $tmpdirectory.'/contigs_pipe_character_in_seq_name.fa' , -format => 'Fasta')), 'Open temp fasta file'); | ||
is($in_fasta_obj->next_seq()->id, '1', 'seq name now 1'); | ||
is($in_fasta_obj->next_seq()->id, '2', 'seq name now 2'); | ||
is($in_fasta_obj->next_seq()->id, '3', 'seq name now 3'); | ||
done_testing(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
#!/usr/bin/env perl | ||
use strict; | ||
use warnings; | ||
use File::Temp; | ||
|
||
BEGIN { unshift(@INC, './modules') } | ||
BEGIN { | ||
use Test::Most; | ||
use_ok('MLST::MultipleFastas'); | ||
} | ||
|
||
my $tmpdirectory_obj = File::Temp->newdir(CLEANUP => 1); | ||
my $tmpdirectory = $tmpdirectory_obj->dirname(); | ||
|
||
ok((my $multiple_fastas = MLST::MultipleFastas->new( | ||
species => "E.coli", | ||
base_directory => 't/data', | ||
raw_input_fasta_files => ['t/data/contigs.fa'], | ||
makeblastdb_exec => 'makeblastdb', | ||
blastn_exec => 'blastn', | ||
output_directory => $tmpdirectory, | ||
output_fasta_files => 1, | ||
spreadsheet_basename => 'mlst_results', | ||
parallel_processes => 1 | ||
)),'Initialise single valid fasta'); | ||
ok(($multiple_fastas->create_result_files),'create all the results files for a single valid fasta'); | ||
compare_files('t/data/expected_mlst_results.genomic.csv', $tmpdirectory.'/mlst_results.genomic.csv'); | ||
compare_files('t/data/expected_mlst_results.allele.csv', $tmpdirectory.'/mlst_results.allele.csv'); | ||
compare_files('t/data/expected_concatenated_alleles.fa', $tmpdirectory.'/concatenated_alleles.fa'); | ||
|
||
$tmpdirectory_obj = File::Temp->newdir(CLEANUP => 1); | ||
$tmpdirectory = $tmpdirectory_obj->dirname(); | ||
ok(($multiple_fastas = MLST::MultipleFastas->new( | ||
species => "E.coli", | ||
base_directory => 't/data', | ||
raw_input_fasta_files => ['t/data/contigs.fa','t/data/contigs_pipe_character_in_seq_name.fa'], | ||
makeblastdb_exec => 'makeblastdb', | ||
blastn_exec => 'blastn', | ||
output_directory => $tmpdirectory, | ||
output_fasta_files => 1, | ||
spreadsheet_basename => 'mlst_results', | ||
parallel_processes => 1 | ||
)),'Initialise 2 files, one with pipe char and no hits'); | ||
ok(($multiple_fastas->create_result_files),'create all the results files for two fastas'); | ||
compare_files('t/data/expected_two_mlst_results.genomic.csv', $tmpdirectory.'/mlst_results.genomic.csv'); | ||
compare_files('t/data/expected_two_mlst_results.allele.csv', $tmpdirectory.'/mlst_results.allele.csv'); | ||
compare_files('t/data/expected_two_concatenated_alleles.fa', $tmpdirectory.'/concatenated_alleles.fa'); | ||
|
||
|
||
$tmpdirectory_obj = File::Temp->newdir(CLEANUP => 1); | ||
$tmpdirectory = $tmpdirectory_obj->dirname(); | ||
ok(($multiple_fastas = MLST::MultipleFastas->new( | ||
species => "E.coli", | ||
base_directory => 't/data', | ||
raw_input_fasta_files => ['t/data/contigs.fa','t/data/contigs_pipe_character_in_seq_name.fa','t/data/contigs_one_unknown.tfa'], | ||
makeblastdb_exec => 'makeblastdb', | ||
blastn_exec => 'blastn', | ||
output_directory => $tmpdirectory, | ||
output_fasta_files => 1, | ||
spreadsheet_basename => 'mlst_results', | ||
parallel_processes => 1 | ||
)),'Initialise 3 files where 1 has near matches'); | ||
ok(($multiple_fastas->create_result_files),'create all the results files for three fastas'); | ||
compare_files('t/data/expected_three_mlst_results.genomic.csv', $tmpdirectory.'/mlst_results.genomic.csv'); | ||
compare_files('t/data/expected_three_mlst_results.allele.csv', $tmpdirectory.'/mlst_results.allele.csv'); | ||
compare_files('t/data/expected_three_concatenated_alleles.fa', $tmpdirectory.'/concatenated_alleles.fa'); | ||
compare_files('t/data/expected_three_contigs_one_unknown.unknown_allele.adk-2.fa', $tmpdirectory.'/contigs_one_unknown.unknown_allele.adk-2.fa'); | ||
compare_files('t/data/expected_three_contigs_one_unknown.unknown_allele.recA-1.fa', $tmpdirectory.'/contigs_one_unknown.unknown_allele.recA-1.fa'); | ||
|
||
|
||
done_testing(); | ||
|
||
sub compare_files | ||
{ | ||
my($expected_file, $actual_file) = @_; | ||
ok((-e $actual_file),' results file exist'); | ||
local $/ = undef; | ||
open(EXPECTED, $expected_file); | ||
open(ACTUAL, $actual_file); | ||
my $expected_line = <EXPECTED>; | ||
my $actual_line = <ACTUAL>; | ||
is($expected_line,$actual_line, 'Content matches expected'); | ||
} |
Oops, something went wrong.