diff --git a/get_sequence_type b/get_sequence_type index 0e58121..a6c2640 100755 --- a/get_sequence_type +++ b/get_sequence_type @@ -22,6 +22,7 @@ path-help@sanger.ac.uk package SequenceType::Main; BEGIN { unshift(@INC, './modules') } +use lib "/software/pathogen/internal/prod/lib"; use Moose; use Getopt::Long; use Cwd; @@ -102,5 +103,3 @@ else ); $multiple_fastas->create_result_files; } - -# list all available MLST databases diff --git a/modules/MLST/CompareAlleles.pm b/modules/MLST/CompareAlleles.pm index 415167c..84e313e 100644 --- a/modules/MLST/CompareAlleles.pm +++ b/modules/MLST/CompareAlleles.pm @@ -21,8 +21,9 @@ use File::Basename; use Bio::SeqIO; use MLST::Blast::Database; use MLST::Blast::BlastN; +use MLST::Types; -has 'sequence_filename' => ( is => 'ro', isa => 'Str', required => 1 ); +has 'sequence_filename' => ( is => 'ro', isa => 'MLST::File', required => 1 ); has 'allele_filenames' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); has 'makeblastdb_exec' => ( is => 'ro', isa => 'Str', default => 'makeblastdb' ); has 'blastn_exec' => ( is => 'ro', isa => 'Str', default => 'blastn' ); diff --git a/modules/MLST/Exceptions.pm b/modules/MLST/Exceptions.pm new file mode 100644 index 0000000..d3d6c96 --- /dev/null +++ b/modules/MLST/Exceptions.pm @@ -0,0 +1,7 @@ +package MLST::Exceptions; + +use Exception::Class ( + MLST::Exceptions::FileDoestExist => { description => 'File doesnt exist' }, +); + +1; diff --git a/modules/MLST/MultipleFastas.pm b/modules/MLST/MultipleFastas.pm index 5205ec3..b68a6a8 100644 --- a/modules/MLST/MultipleFastas.pm +++ b/modules/MLST/MultipleFastas.pm @@ -22,6 +22,8 @@ use Moose; use Parallel::ForkManager; use MLST::ProcessFasta; use MLST::Spreadsheet::File; +use MLST::NormaliseFasta; +use File::Temp; has 'species' => ( is => 'ro', isa => 'Str', required => 1 ); has 'base_directory' => ( is => 'ro', isa => 'Str', required => 1 ); @@ -41,6 +43,7 @@ has '_input_fasta_files' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, build has '_concat_names' => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} ); has '_concat_sequences' => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} ); +has '_working_directory' => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir(CLEANUP => 1); }); sub _generate_spreadsheet_rows { @@ -93,18 +96,18 @@ sub _generate_spreadsheet_rows sub _build__input_fasta_files { my($self) = @_; - # TODO: Validate and Reformat the fasta files if theres a pipe character - - # Validate + my @normalised_fasta_files; + for my $fastafile (@{$self->raw_input_fasta_files}) { - if(!(-e $fastafile )) - { - die "Input file doesnt exist: $fastafile\n"; - } + my $output_fasta_obj = MLST::NormaliseFasta->new( + fasta_filename => $fastafile, + working_directory => $self->_working_directory->dirname() + ); + push(@normalised_fasta_files,$output_fasta_obj->processed_fasta_filename()); } - return $self->raw_input_fasta_files; + return \@normalised_fasta_files; } sub create_result_files diff --git a/modules/MLST/NormaliseFasta.pm b/modules/MLST/NormaliseFasta.pm new file mode 100644 index 0000000..73c7fe4 --- /dev/null +++ b/modules/MLST/NormaliseFasta.pm @@ -0,0 +1,71 @@ +=head1 NAME + +NormaliseFasta - Take in a Fasta file, check for invalid characters and build a corrected file if needed. This is needed for NCBI makeblastdb which doesnt like the pipe character in the sequence name + +=head1 SYNOPSIS + +use MLST::NormaliseFasta; + +my $output_fasta = MLST::NormaliseFasta->new( + fasta_filename => 'Filename.fasta' + +); +$output_fasta->processed_fasta_filename(); + +=cut + +package MLST::NormaliseFasta; +use Moose; +use Bio::SeqIO; +use File::Basename; +use MLST::Types; + +has 'fasta_filename' => ( is => 'ro', isa => 'MLST::File', required => 1 ); +has 'working_directory' => ( is => 'ro', isa => 'Str', required => 1 ); + +has '_normalised_fasta_filename' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__normalised_fasta_filename' ); + +sub _build__normalised_fasta_filename +{ + my($self) = @_; + my $fasta_obj = Bio::SeqIO->new( -file => $self->fasta_filename , -format => 'Fasta'); + + while(my $seq = $fasta_obj->next_seq()) + { + if($seq->id =~ m/\|/ ) + { + return $self->_rename_sequences(); + } + } + + return $self->fasta_filename; +} + +sub _rename_sequences +{ + my($self) = @_; + my $in_fasta_obj = Bio::SeqIO->new( -file => $self->fasta_filename , -format => 'Fasta'); + my($filename, $directories, $suffix) = fileparse($self->fasta_filename); + my $output_filename = $self->working_directory.'/'.$filename.$suffix ; + my $out_fasta_obj = Bio::SeqIO->new(-file => "+>".$output_filename , -format => 'Fasta'); + + my $counter = 1; + while(my $seq = $in_fasta_obj->next_seq()) + { + $seq->id($counter.""); + $out_fasta_obj->write_seq($seq); + $counter++; + } + return $output_filename; +} + +sub processed_fasta_filename +{ + my($self) = @_; + return $self->_normalised_fasta_filename; +} + +no Moose; +__PACKAGE__->meta->make_immutable; +1; + diff --git a/modules/MLST/OutputFasta.pm b/modules/MLST/OutputFasta.pm index b4d727a..e040564 100644 --- a/modules/MLST/OutputFasta.pm +++ b/modules/MLST/OutputFasta.pm @@ -22,11 +22,12 @@ use File::Basename; use File::Path qw(make_path); use Bio::PrimarySeq; use Bio::SeqIO; +use MLST::Types; has 'matching_sequences' => ( is => 'ro', isa => 'Maybe[HashRef]', required => 1 ); has 'non_matching_sequences' => ( is => 'ro', isa => 'Maybe[HashRef]', required => 1 ); has 'output_directory' => ( is => 'ro', isa => 'Str', required => 1 ); -has 'input_fasta_file' => ( is => 'ro', isa => 'Str', required => 1 ); +has 'input_fasta_file' => ( is => 'ro', isa => 'MLST::File', required => 1 ); has '_fasta_filename' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__fasta_filename' ); has 'concat_sequence' => ( is => 'rw', isa => 'Maybe[Str]' ); diff --git a/modules/MLST/ProcessFasta.pm b/modules/MLST/ProcessFasta.pm index 885477d..b87befd 100644 --- a/modules/MLST/ProcessFasta.pm +++ b/modules/MLST/ProcessFasta.pm @@ -24,10 +24,11 @@ use MLST::CompareAlleles; use MLST::SequenceType; use MLST::OutputFasta; use MLST::Spreadsheet::Row; +use MLST::Types; has 'species' => ( is => 'ro', isa => 'Str', required => 1 ); has 'base_directory' => ( is => 'ro', isa => 'Str', required => 1 ); -has 'fasta_file' => ( is => 'ro', isa => 'Str', required => 1 ); +has 'fasta_file' => ( is => 'ro', isa => 'MLST::File', required => 1 ); has 'makeblastdb_exec' => ( is => 'ro', isa => 'Str', required => 1 ); has 'blastn_exec' => ( is => 'ro', isa => 'Str', required => 1 ); has 'output_directory' => ( is => 'ro', isa => 'Str', required => 1 ); diff --git a/modules/MLST/SearchForFiles.pm b/modules/MLST/SearchForFiles.pm index f20666f..11de4c3 100644 --- a/modules/MLST/SearchForFiles.pm +++ b/modules/MLST/SearchForFiles.pm @@ -17,11 +17,12 @@ $search_results->profiles_filename(); package MLST::SearchForFiles; use Moose; +use MLST::Types; has 'species_name' => ( is => 'ro', isa => 'Str', required => 1 ); has 'base_directory' => ( is => 'ro', isa => 'Str', required => 1 ); -has 'profiles_filename' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build_profiles_filename'); +has 'profiles_filename' => ( is => 'ro', isa => 'MLST::File', lazy => 1, builder => '_build_profiles_filename'); has 'allele_filenames' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_allele_filenames'); has 'search_base_directory' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__search_base_directory'); diff --git a/modules/MLST/SequenceType.pm b/modules/MLST/SequenceType.pm index cd8f0cb..510987b 100644 --- a/modules/MLST/SequenceType.pm +++ b/modules/MLST/SequenceType.pm @@ -17,8 +17,9 @@ $st->sequence_type(); package MLST::SequenceType; use Moose; +use MLST::Types; -has 'profiles_filename' => ( is => 'ro', isa => 'Str', required => 1 ); +has 'profiles_filename' => ( is => 'ro', isa => 'MLST::File', required => 1 ); has 'sequence_names' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); has 'allele_to_number' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_allele_to_number' ); diff --git a/modules/MLST/Types.pm b/modules/MLST/Types.pm index 2321567..9028c17 100644 --- a/modules/MLST/Types.pm +++ b/modules/MLST/Types.pm @@ -2,11 +2,16 @@ package MLST::Types; use Moose; use Moose::Util::TypeConstraints; use MLST::Validate::Executable; +use MLST::Validate::File; subtype 'MLST::Executable', as 'Str', where { MLST::Validate::Executable->new()->does_executable_exist($_) }; +subtype 'MLST::File', + as 'Str', + where { MLST::Validate::File->new()->does_file_exist($_) }; + no Moose; no Moose::Util::TypeConstraints; __PACKAGE__->meta->make_immutable; diff --git a/modules/MLST/Validate/File.pm b/modules/MLST/Validate/File.pm new file mode 100644 index 0000000..e075e56 --- /dev/null +++ b/modules/MLST/Validate/File.pm @@ -0,0 +1,22 @@ +=head1 NAME + +File - Does a file exist? + +=head1 SYNOPSIS + +=cut + +package MLST::Validate::File; +use Moose; + +sub does_file_exist +{ + my($self, $file) = @_; + return 1 if(-e $file); + + return 0; +} + +no Moose; +__PACKAGE__->meta->make_immutable; +1; \ No newline at end of file diff --git a/t/Input/NormaliseFasta.t b/t/Input/NormaliseFasta.t new file mode 100644 index 0000000..282e902 --- /dev/null +++ b/t/Input/NormaliseFasta.t @@ -0,0 +1,32 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use File::Temp; +use Bio::SeqIO; + +BEGIN { unshift(@INC, './modules') } +BEGIN { + use Test::Most; + use_ok('MLST::NormaliseFasta'); +} + +my $tmpdirectory_obj = File::Temp->newdir(CLEANUP => 1); +my $tmpdirectory = $tmpdirectory_obj->dirname(); + +ok((my $output_fasta = MLST::NormaliseFasta->new( + fasta_filename => 't/data/contigs.fa', + working_directory => $tmpdirectory +)),'Initalise file wihtout pipe characters in sequence names'); +is($output_fasta->processed_fasta_filename(),'t/data/contigs.fa', 'file without pipe characters shouldnt change at all'); + + +ok(($output_fasta = MLST::NormaliseFasta->new( + fasta_filename => 't/data/contigs_pipe_character_in_seq_name.fa', + working_directory => $tmpdirectory +)),'Initalise file with pipe characters in filename'); +is($output_fasta->processed_fasta_filename(), $tmpdirectory.'/contigs_pipe_character_in_seq_name.fa', 'file without pipe characters shouldnt change at all'); +ok((my $in_fasta_obj = Bio::SeqIO->new( -file => $tmpdirectory.'/contigs_pipe_character_in_seq_name.fa' , -format => 'Fasta')), 'Open temp fasta file'); +is($in_fasta_obj->next_seq()->id, '1', 'seq name now 1'); +is($in_fasta_obj->next_seq()->id, '2', 'seq name now 2'); +is($in_fasta_obj->next_seq()->id, '3', 'seq name now 3'); +done_testing(); diff --git a/t/Output/MultipleFastas.t b/t/Output/MultipleFastas.t new file mode 100644 index 0000000..13e9449 --- /dev/null +++ b/t/Output/MultipleFastas.t @@ -0,0 +1,83 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use File::Temp; + +BEGIN { unshift(@INC, './modules') } +BEGIN { + use Test::Most; + use_ok('MLST::MultipleFastas'); +} + +my $tmpdirectory_obj = File::Temp->newdir(CLEANUP => 1); +my $tmpdirectory = $tmpdirectory_obj->dirname(); + +ok((my $multiple_fastas = MLST::MultipleFastas->new( + species => "E.coli", + base_directory => 't/data', + raw_input_fasta_files => ['t/data/contigs.fa'], + makeblastdb_exec => 'makeblastdb', + blastn_exec => 'blastn', + output_directory => $tmpdirectory, + output_fasta_files => 1, + spreadsheet_basename => 'mlst_results', + parallel_processes => 1 +)),'Initialise single valid fasta'); +ok(($multiple_fastas->create_result_files),'create all the results files for a single valid fasta'); +compare_files('t/data/expected_mlst_results.genomic.csv', $tmpdirectory.'/mlst_results.genomic.csv'); +compare_files('t/data/expected_mlst_results.allele.csv', $tmpdirectory.'/mlst_results.allele.csv'); +compare_files('t/data/expected_concatenated_alleles.fa', $tmpdirectory.'/concatenated_alleles.fa'); + +$tmpdirectory_obj = File::Temp->newdir(CLEANUP => 1); +$tmpdirectory = $tmpdirectory_obj->dirname(); +ok(($multiple_fastas = MLST::MultipleFastas->new( + species => "E.coli", + base_directory => 't/data', + raw_input_fasta_files => ['t/data/contigs.fa','t/data/contigs_pipe_character_in_seq_name.fa'], + makeblastdb_exec => 'makeblastdb', + blastn_exec => 'blastn', + output_directory => $tmpdirectory, + output_fasta_files => 1, + spreadsheet_basename => 'mlst_results', + parallel_processes => 1 +)),'Initialise 2 files, one with pipe char and no hits'); +ok(($multiple_fastas->create_result_files),'create all the results files for two fastas'); +compare_files('t/data/expected_two_mlst_results.genomic.csv', $tmpdirectory.'/mlst_results.genomic.csv'); +compare_files('t/data/expected_two_mlst_results.allele.csv', $tmpdirectory.'/mlst_results.allele.csv'); +compare_files('t/data/expected_two_concatenated_alleles.fa', $tmpdirectory.'/concatenated_alleles.fa'); + + +$tmpdirectory_obj = File::Temp->newdir(CLEANUP => 1); +$tmpdirectory = $tmpdirectory_obj->dirname(); +ok(($multiple_fastas = MLST::MultipleFastas->new( + species => "E.coli", + base_directory => 't/data', + raw_input_fasta_files => ['t/data/contigs.fa','t/data/contigs_pipe_character_in_seq_name.fa','t/data/contigs_one_unknown.tfa'], + makeblastdb_exec => 'makeblastdb', + blastn_exec => 'blastn', + output_directory => $tmpdirectory, + output_fasta_files => 1, + spreadsheet_basename => 'mlst_results', + parallel_processes => 1 +)),'Initialise 3 files where 1 has near matches'); +ok(($multiple_fastas->create_result_files),'create all the results files for three fastas'); +compare_files('t/data/expected_three_mlst_results.genomic.csv', $tmpdirectory.'/mlst_results.genomic.csv'); +compare_files('t/data/expected_three_mlst_results.allele.csv', $tmpdirectory.'/mlst_results.allele.csv'); +compare_files('t/data/expected_three_concatenated_alleles.fa', $tmpdirectory.'/concatenated_alleles.fa'); +compare_files('t/data/expected_three_contigs_one_unknown.unknown_allele.adk-2.fa', $tmpdirectory.'/contigs_one_unknown.unknown_allele.adk-2.fa'); +compare_files('t/data/expected_three_contigs_one_unknown.unknown_allele.recA-1.fa', $tmpdirectory.'/contigs_one_unknown.unknown_allele.recA-1.fa'); + + +done_testing(); + +sub compare_files +{ + my($expected_file, $actual_file) = @_; + ok((-e $actual_file),' results file exist'); + local $/ = undef; + open(EXPECTED, $expected_file); + open(ACTUAL, $actual_file); + my $expected_line = <EXPECTED>; + my $actual_line = <ACTUAL>; + is($expected_line,$actual_line, 'Content matches expected'); +} diff --git a/t/Output/OutputFasta.t b/t/Output/OutputFasta.t index ed49230..1da9f8e 100644 --- a/t/Output/OutputFasta.t +++ b/t/Output/OutputFasta.t @@ -16,7 +16,7 @@ ok((my $output_fasta = MLST::OutputFasta->new( matching_sequences => {'adk-2' => "AAAA", 'purA-3' => "CCCC"}, non_matching_sequences => {}, output_directory => $tmpdirectory, - input_fasta_file => '/path/to/myfasta.fa' + input_fasta_file => 't/data/contigs.fa' )), "Initialise matching seq"); ok(($output_fasta->create_files()),'created output files'); @@ -26,10 +26,10 @@ ok(($output_fasta = MLST::OutputFasta->new( matching_sequences => {}, non_matching_sequences => {}, output_directory => $tmpdirectory, - input_fasta_file => '/path/to/myfasta.fa' + input_fasta_file => 't/data/contigs.fa' )), "Initialise no matching seq"); ok(($output_fasta->create_files()),'created output files'); -ok(!(-e $tmpdirectory."/myfasta.mlst_loci.fa"), 'No output files created'); +ok(!(-e $tmpdirectory."/contigs.mlst_loci.fa"), 'No output files created'); $tmpdirectory_obj = File::Temp->newdir(CLEANUP => 1); $tmpdirectory = $tmpdirectory_obj->dirname(); @@ -37,13 +37,13 @@ ok(($output_fasta = MLST::OutputFasta->new( matching_sequences => { 'purA-3' => "CCCC", 'adk-2' => "AAAA"}, non_matching_sequences => {'EEE' => "GGGG",'FFF' => "TTTT"}, output_directory => $tmpdirectory, - input_fasta_file => '/path/to/myfasta.fa' + input_fasta_file => 't/data/contigs.fa' )), "Initialise matching and non matching"); ok(($output_fasta->create_files()),'created output files'); -compare_file_content($tmpdirectory."/myfasta.unknown_allele.EEE.fa", '>EEE +compare_file_content($tmpdirectory."/contigs.unknown_allele.EEE.fa", '>EEE GGGG '); -compare_file_content($tmpdirectory."/myfasta.unknown_allele.FFF.fa", '>FFF +compare_file_content($tmpdirectory."/contigs.unknown_allele.FFF.fa", '>FFF TTTT '); @@ -53,11 +53,11 @@ ok(($output_fasta = MLST::OutputFasta->new( matching_sequences => { 'purA-3' => "CCCC", 'adk-2' => "AAAA"}, non_matching_sequences => {'EEE' => "NNNN",'FFF' => "TTTT"}, output_directory => $tmpdirectory, - input_fasta_file => '/path/to/myfasta.fa' + input_fasta_file => 't/data/contigs.fa' )), "Initialise non matching with an unknown sequence"); ok(($output_fasta->create_files()),'created output files'); -ok(!(-e $tmpdirectory."/myfasta.unknown_allele.EEE.fa"), 'No output files created for unknown loci'); -compare_file_content($tmpdirectory."/myfasta.unknown_allele.FFF.fa", '>FFF +ok(!(-e $tmpdirectory."/contigs.unknown_allele.EEE.fa"), 'No output files created for unknown loci'); +compare_file_content($tmpdirectory."/contigs.unknown_allele.FFF.fa", '>FFF TTTT '); @@ -67,13 +67,13 @@ ok(($output_fasta = MLST::OutputFasta->new( matching_sequences => { 'purA-3' => "CCCC", 'adk-2' => "AAAA"}, non_matching_sequences => {'EEE' => "GGNN",'FFF' => "TTTT"}, output_directory => $tmpdirectory, - input_fasta_file => '/path/to/myfasta.fa' + input_fasta_file => 't/data/contigs.fa' )), "Initialise non matching has a short sequence"); ok(($output_fasta->create_files()),'created output files'); -compare_file_content($tmpdirectory."/myfasta.unknown_allele.EEE.fa", '>EEE +compare_file_content($tmpdirectory."/contigs.unknown_allele.EEE.fa", '>EEE GGNN '); -compare_file_content($tmpdirectory."/myfasta.unknown_allele.FFF.fa", '>FFF +compare_file_content($tmpdirectory."/contigs.unknown_allele.FFF.fa", '>FFF TTTT '); diff --git a/t/SequenceTypes/SearchForFiles.t b/t/SequenceTypes/SearchForFiles.t index 83f5d97..6666f92 100644 --- a/t/SequenceTypes/SearchForFiles.t +++ b/t/SequenceTypes/SearchForFiles.t @@ -26,7 +26,7 @@ sub species_name_regex species_name => $regex, base_directory => 't/data' )),"initialise searching for files with $regex"); - is_deeply(['t/data/Escherichia_coli_1/alleles/aaa.tfa', 't/data/Escherichia_coli_1/alleles/bbb.tfa'],$search_results->allele_filenames(),"allele filenames for $regex"); + is_deeply(['t/data/Escherichia_coli_1/alleles/adk.tfa', 't/data/Escherichia_coli_1/alleles/purA.tfa','t/data/Escherichia_coli_1/alleles/recA.tfa'],$search_results->allele_filenames(),"allele filenames for $regex"); is('t/data/Escherichia_coli_1/profiles/escherichia_coli.txt', $search_results->profiles_filename(),"profiles filename for $regex"); } \ No newline at end of file diff --git a/t/data/Escherichia_coli_1/alleles/aaa.tfa b/t/data/Escherichia_coli_1/alleles/aaa.tfa deleted file mode 100644 index e69de29..0000000 diff --git a/t/data/Escherichia_coli_1/alleles/adk.tfa b/t/data/Escherichia_coli_1/alleles/adk.tfa new file mode 100644 index 0000000..d6a586b --- /dev/null +++ b/t/data/Escherichia_coli_1/alleles/adk.tfa @@ -0,0 +1,40 @@ +>adk-1 +GGGGAAAGGGACTCAGGCTCAGTTCATCATGGAGAAATATGGTATTCCGCAAATCTCCAC +TGGCGATATGCTGCGTGCTGCGGTCAAATCTGGCTCCGAGCTGGGTAAACAAGCAAAAGA +CATTATGGATGCTGGCAAACTGGTCACCGACGAACTGGTGATCGCGCTGGTTAAAGAGCG +CATTGCTCAGGAAGACTGCCGTAATGGTTTCCTGTTGGACGGCTTCCCGCGTACCATTCC +GCAGGCAGACGCGATGAAAGAAGCGGGCATCAATGTTGATTACGTTCTGGAATTCGACGT +ACCGGACGAACTGATTGTTGATCGTATCGTAGGCCGCCGCGTTCATGCGCCGTCTGGTCG +TGTTTATCACGTTAAATTCAATCCGCCGAAAGTAGAAGGCAAAGACGACGTTACCGGTGA +AGAACTGACTACCCGTAAAGACGATCAGGAAGAAACCGTACGTAAACGTCTGGTTGAATA +CCATCAGATGACTGCACCGCTGATCGGCTACTACTCCAAAGAAGCGGAAGCGGGTA +>adk-2 +GGGGAAAGGGACTCAGGCTCAGTTCATCATGGAGAAATATGGTATTCCGCAAATCTCCAC +TGGCGATATGCTGCGTGCTGCGGTCAAATCTGGCTCCGAGCTGGGTAAACAAGCAAAAGA +CATTATGGATGCTGGCAAACTGGTTACCGACGAACTGGTGATCGCGCTGGTTAAAGGGCG +CATTGCTCAGGAAGACTGCCGTAATGGTTTCCTGTTGGACGGCTTCCCGCGTACCATTCC +GCAGGCAGACGCGATGAAAGAAGCGGGCATCAATGTTGATTACGTTCTGGAATTCGACGT +ACCGGACGAACTGATCGTTGACCGTATCGTCGGTCGCCGCGTTCACGCGCCGTCTGGTCG +TGTTTATCACGTTAAATTCAATCCGCCGAAAGTAGAAGGTAAAGACGACGTTACCGGTGA +AGAACTGACTACCCGTAAAGACGATCAGGAAGAAACCGTACGTAAACGTCTGGTTGAATA +CCATCAGATGACAGCACCGCTGATCGGCTACTACTCCAAAGAAGCTGAAGCGGGTA +>adk-3 +GGGGAAAGGGACTCAGGCTCAGTTCATCATGGAGAAATATGGTATTCCGCAAATCTCCAC +TGGCGATATGCTGCGTGCTGCGGTCAAATCTGGCTCCGAGCTGGGTAAACAAGCAAAAGA +CATTATGGATGCTGGCAAACTGGTCACCGACGAACTGGTGATCGCGCTGGTTAAAGAGCG +CATTGCTCAGGAAGACTGCCGTAATGGTTTCCTGTTGGACGGCTTCCCGCGTACCATTCC +GCAGGCAGACGCGATGAAAGAAGCGGGCATCAATGTTGATTACGTTCTGGAATTCGACGT +ACCGGACGAACTGATTGTTGATCGTATCGTAGGCCGCCGCGTTCATGCGCCGTCTGGTCG +TGTTTATCACGTTAAATTCAATCCGCCGAAAGTAGAAGGCAAAGACGACGTTACCGGTGA +AGAACTGACTACCCGTAAAGACGATCAGGAAGAAACCGTGCGTAAACGTCTGGTTGAATA +CCATCAGATGACTGCACCGTTGATCGGCTACTACTCCAAAGAAGCGGAAGCGGGTA +>adk-4 +GGGGAAAGGGACTCAGGCTCAGTTCATCATGGAGAAATATGGTATTCCGCAAATCTCCAC +TGGCGATATGCTGCGTGCTGCGGTCAAATCTGGCTCCGAGCTGGGTAAACAAGCAAAAGA +CATTATGGATGCTGGCAAACTGGTTACCGACGAACTGGTGATCGCGCTGGTTAAAGAGCG +CATTGCTCAGGAAGACTGCCGTAATGGTTTCCTGTTGGACGGCTTCCCGCGTACCATTCC +GCAGGCAGACGCGATGAAAGAAGCGGGCATCAATGTTGATTACGTTCTGGAATTCGACGT +ACCGGACGAACTGATCGTTGACCGTATTGTCGGTCGCCGCGTTCACGCGCCGTCTGGTCG +TGTTTATCACGTTAAATTCAACCCGCCGAAAGTAGAAGGCAAAGACGACGTTACCGGTGA +AGAACTGACTACCCGTAAAGACGATCAGGAAGAAACCGTACGTAAACGTCTGGTTGAATA +CCATCAGATGACTGCACCGCTGATCGGCTACTACTCCAAAGAAGCGGAAGCGGGTA \ No newline at end of file diff --git a/t/data/Escherichia_coli_1/alleles/bbb.tfa b/t/data/Escherichia_coli_1/alleles/bbb.tfa deleted file mode 100644 index e69de29..0000000 diff --git a/t/data/Escherichia_coli_1/alleles/purA.tfa b/t/data/Escherichia_coli_1/alleles/purA.tfa new file mode 100644 index 0000000..eebc74f --- /dev/null +++ b/t/data/Escherichia_coli_1/alleles/purA.tfa @@ -0,0 +1,36 @@ +>purA-1 +ATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCG +GGCCTGCTTATGAAGATAAAGTGGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACA +AAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTA +ACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTG +CCGACATCCTGACTTCTATGGTGGTTGACGTTTCTGACCTGCTCGACCAGGCGCGTCAGC +GTGGCGATTTCGTCATGTTTGAAGGTGCGCAGGGTACGCTGCTGGATATCGACCACGGTA +CTTATCCGTACGTAACTTCTTCCAACACCACTGCTGGTGGCGTGGCGACCGGTTCCGGCC +TGGGCCCGCGTTATGTTGATTACGTTCTGGGTATCCTCAAAGCTTACTCCACTCGTGT +>purA-2 +ATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCG +GGCCTGCTTATGAAGATAAAGTGGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACA +AAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTA +ACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTG +CCGACATCCTGACTTCTATGGTGGTTGACGTTTCTGACCTGCTCGACCAGGCGCGTCAGC +GTGGCGATTTCGTCATGTTCGAAGGTGCGCAGGGTACGCTGCTGGATATCGACCACGGTA +CTTATCCGTACGTAACTTCTTCCAACACCACTGCTGGTGGCGTGGCGACCGGTTCCGGCC +TGGGCCCGCGTTATGTTGATTACGTTCTGGGTATCCTCAAAGCTTACTCCACTCGTGT +>purA-3 +ATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCG +GGCCTGCTTATGAAGATAAAGTGGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACA +AAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTA +ACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTG +CCGACATCCTGACTTCTATGGTGGTTGACGTTTCTGACCTGCTCGACCAGGCGCGTCAGC +GTGGCGATTTCGTCATGTTCGAAGGTGCGCAGGGTACGCTGCTGGATATCGACCACGGTA +CTTATCCGTACGTAACTTCTTCCAACACCACTGCTGGTGGCGTGGCGACCGGTTCCGGCC +TGGGCCCGCGTTATGTTGATTACGTTCTGGGTATCCTCAAAGCTTACTCAACTCGTGT +>purA-4 +ATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCG +GGCCTGCTTATGAAGATAAAGTGGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACA +AAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTA +ACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTG +CCGACATCCTGACTTCTATGGTGGTTGATGTTTCTGACCTGCTCGACCAGGCGCGTCAGC +GTGGCGATTTCGTCATGTTCGAAGGTGCTCAGGGTACGCTGCTGGATATCGACCACGGTA +CTTATCCGTACGTAACTTCTTCCAACACCACTGCTGGTGGCGTGGCGACCGGTTCCGGCC +TGGGCCCGCGTTATGTTGATTACGTTCTGGGTATCCTCAAAGCTTACTCCACTCGTGT \ No newline at end of file diff --git a/t/data/Escherichia_coli_1/alleles/recA.tfa b/t/data/Escherichia_coli_1/alleles/recA.tfa new file mode 100644 index 0000000..9a0031c --- /dev/null +++ b/t/data/Escherichia_coli_1/alleles/recA.tfa @@ -0,0 +1,90 @@ +>recA-1 +CGCACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGA +GCAGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGTGCAGTAGACGTTATCGTCGT +TGACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCA +CATGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAA +GCAGTCCAACACGCTGCTGATCTTCATCAACCAGATCCGTATGAAAATTGGTGTGATGTT +CGGTAACCCGGAAACCACTACCGGTGGTAACGCGCTGAAATTCTACGCCTCTGTTCGTCT +CGACATCCGTCGTATCGGCGCGGTGAAAGAGGGCGAAAACGTGGTGGGTAGCGAAACCCG +CGTGAAAGTGGTGAAGAACAAAATCGCTGCACCGTTTAAACAGGCTGAATTTCAGATCCT +CTACGGCGAAGGTATCAACTTCTACGGCGA +>recA-2 +CGCACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGA +GCAGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGCGCAGTAGACGTTATCGTCGT +TGACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCA +CATGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAA +GCAGTCCAACACGCTGCTGATCTTCATCAACCAGATCCGTATGAAAATTGGTGTGATGTT +CGGTAACCCGGAAACCACTACCGGTGGTAACGCGCTGAAATTCTACGCCTCTGTTCGTCT +CGACATCCGTCGTATCGGCGCGGTGAAAGAGGGCGAAAACGTGGTGGGTAGCGAAACCCG +CGTGAAAGTGGTGAAGAACAAAATCGCTGCGCCGTTTAAACAGGCTGAATTCCAGATCCT +CTACGGCGAAGGTATCAACTTCTACGGCGA +>recA-3 +CGCACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGA +GCAGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGCGCAGTAGACGTTATCGTCGT +TGACTCCGTGGCGGCCCTGACGCCGAAAGCGGAAATCGAAGGTGAAATCGGCGACTCTCA +CATGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAA +GCAGTCCAACACGCTGCTGATCTTCATCAACCAGATCCGTATGAAAATTGGTGTGATGTT +CGGTAACCCGGAAACCACTACCGGTGGTAACGCGCTGAAATTCTACGCCTCTGTTCGTCT +CGACATCCGTCGTATCGGCGCGGTGAAAGAGGGCGAAAACGTGGTGGGTAGCGAAACCCG +CGTGAAAGTGGTGAAGAACAAAATCGCTGCGCCGTTTAAACAGGCTGAATTCCAGATCCT +CTACGGCGAAGGTATCAACTTCTACGGCGA +>recA-4 +TGCACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACTGGCGA +GCAGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGCGCAGTAGACGTTATCGTCGT +TGACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCA +CATGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAA +GCAGTCCAACACGCTGCTGATCTTCATCAACCAGATCCGTATGAAAATTGGTGTGATGTT +CGGTAACCCGGAAACCACTACCGGTGGTAACGCGCTGAAATTCTACGCCTCTGTTCGTCT +CGACATCCGTCGTATCGGCGCGGTGAAAGAGGGCGAAAACGTGGTGGGTAGCGAAACCCG +TGTGAAAGTGGTGAAGAACAAAATCGCTGCGCCGTTTAAACAGGCTGAATTCCAGATCCT +CTACGGCGAAGGTATCAACTTCTATGGCGA +>recA-5 +CGCACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGA +GCAGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGGGCAGTAGACGTTATCGTCGT +TGACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCA +CATGGGCCTTGCGGCACGTATAATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAA +GCAGTCCAACACGCTGCTGATCTTCATCAACCAGATCCGTATGAAAATTGGTGTGATGTT +CGGTAACCCGGAAACCACTACCGGTGGTAACGCGCTGAAATTCTACGCCTCTGTTCGTCT +CGACATCCGTCGTATCGGCGCGGTGAAAGAGGGCGAAAACGTGGTGGGTAGCGAAACCCG +CGTGAAAGTGGTGAAGAACAAAATCGCTGCACCGTTTAAACAGGCTGAATTTCAGATCCT +CTACGGCGAAGGTATCAACTTCTACGGCGA +>recA-6 +CGCACGTAAACTGGGCGTCGATATCGATAACCTGCTGTGCTCCCAGCCGGACACCGGCGA +GCAGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGCGCAGTAGACGTTATCGTCGT +TGACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCA +CATGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAA +GCAGTCCAACACGCTGCTGATCTTCATCAACCAGATCCGTATGAAAATTGGTGTGATGTT +CGGTAACCCGGAAACCACTACCGGTGGTAACGCGCTGAAATTCTACGCCTCTGTTCGTCT +CGACATCCGTCGTATCGGCGCGGTGAAAGAGGGCGAAAACGTGGTGGGTAGCGAAACCCG +CGTGAAAGTGGTGAAGAACAAAATCGCTGCGCCGTTTAAACAGGCTGAATTCCAGATCCT +CTACGGCGAAGGTATCAACTTCTACGGCGA +>recA-7 +CGCACGTAAACTGGGCGTCGATATCGATAACCTGCTGTGCTCCCAGCCGGACACCGGCGA +GCAGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGCGCAGTAGACGTTATCGTCGT +TGACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCA +CATGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAA +GCAGTCCAACACGCTGCTGATCTTCATCAACCAGATCCGTATGAAAATTGGTGTGATGTT +CGGTAACCCGGAAACCACTACCGGTGGTAACGCGCTGAAATTCTACGCCTCTGTTCGTCT +CGACATCCGTCGTATCGGCGCGGTGAAAGAGGGCGAAAACGTGGTGGGTAGCGAAACCCG +CGTGAAAGTGGTGAAGAACAAAATCGCTGCACCGTTTAAACAGGCTGAATTTCAGATCCT +CTACGGCGAAGGTATCAACTTCTACGGCGA +>recA-8 +CGCACGTAAACTGGGCGTCGATATCGATAACCTGCTGTGCTCCCAGCCGGACACCGGCGA +GCAGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGCGCAGTAGACGTTATCGTCGT +TGACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCA +CATGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAA +GCAGTCCAACACGCTGCTGATCTTCATCAACCAGATCCGTATGAAAATTGGTGTGATGTT +TGGTAACCCGGAAACCACTACCGGTGGTAACGCGCTGAAATTCTACGCCTCTGTTCGTCT +CGACATCCGTCGTATCGGCGCGGTGAAAGAGGGCGAAAACGTGGTGGGTAGCGAAACCCG +CGTGAAAGTGGTGAAGAACAAAATCGCTGCGCCGTTTAAACAGGCTGAATTCCAGATCCT +CTACGGCGAAGGTATCAACTTCTACGGCGA +>recA-9 +CGCACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGA +GCAGGCACTGGAAATCTGTGATGCCCTGGCACGTTCTGGCGCAGTAGACGTTATCGTCGT +TGACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCA +CATGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAA +GCAGTCCAACACGCTGCTGATCTTCATCAACCAGATCCGTATGAAAATTGGTGTGATGTT +CGGTAACCCGGAAACCACTACCGGTGGTAACGCGCTGAAATTCTACGCCTCTGTTCGTCT +CGACATCCGTCGTATCGGCGCGGTGAAAGAGGGCGAAAACGTGGTGGGTAGCGAAACCCG +CGTGAAAGTGGTGAAGAACAAAATCGCTGCGCCGTTTAAACAGGCTGAATTCCAGATCCT +CTACGGCGAAGGTATCAACTTCTACGGCGA \ No newline at end of file diff --git a/t/data/contigs_one_unknown.tfa b/t/data/contigs_one_unknown.tfa new file mode 100644 index 0000000..0fe92e6 --- /dev/null +++ b/t/data/contigs_one_unknown.tfa @@ -0,0 +1,2 @@ +>SomeSequenceName +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGGGGAAAGGGACTCAGGCTCAGTTCATCATGGAGAAATATGGTATTCCGCAAATCTCCACTGGCGATATGCTGCGTGCTGCGGTCAAATCTGGCTCCGAGCTGGGTAAACAAGCAAAAGACATTATGGATGCTGGCAAACTGGTTACCGACGAACTGGTGATCGCGCTGGTTAAAGGGCGCATTGCTCAGGAAGACTGCCGTAATGGTTTCCTGTTGGACGGCTTCCCGCGTACCATTCCGCAGGGAGACGCGATGAAAGAAGCGGGCATCAATGTTGATTACGTTCTGGAATTCGACGTACCGGACGAACTGATCGTTGACCGTATCGTCGGTCGCCGCGTTCACGCGCCGTCTGGTCGTGTTTATCACGTTAAATTCAATCCGCCGAAAGTAGAAGGTAAAGACGACGTTACCGGTGAAGAACTGACTACCCGTAAAGACGATCAGGAAGAAACCGTACGTAAACGTCTGGTTGAATACCATCAGATGACAGCACCGCTGATCGGCTACTACTCCAAAGAAGCTGAAGCGGGTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCGGGCCTGCTTATGAAGATAAAGTGGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACAAAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTAACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTGCCGACATCCTGACTTCTATGGTGGTTGACGTTTCTGACCTGCTCGACCAGGCGCGTCAGCGTGGCGATTTCGTCATGTTCGAAGGTGCGCAGGGTACGCTGCTGGATATCGACCACGGTACTTATCCGTACGTAACTTCTTCCAACACCACTGCTGGTGGCGTGGCGACCGGTTCCGGCCTGGGCCCGCGTTATGTTGATTACGTTCTGGGTATCCTCAAAGCTTACTCAACTCGTGTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACGCACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGAGCAGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGTGCAGTAGACGTTATCGTCGTTGACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCACATGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAAGCAGTCCAACACGCTGCTGATCTTCATCAACCAGATCCGTATGGAAATTGGTGTGATGTTCGGTAACCCGGAAACCACTACCGGTGGTAACGCGCTGAAATTCTACGCCTCTGTTCGTCTCGACATCCGTCGTATCGGCGCGGTGAAAGAGGGCGAAAACGTGGTGGGTAGCGAAACCCGCGTGAAAGTGGTGAAGAACAAAATCGCTGCACCGTTTAAACAGGCTGAATTTCAGATCCTCTACGGCGAAGGTATCAACTTCTACGGCGA \ No newline at end of file diff --git a/t/data/contigs_pipe_character_in_seq_name.fa b/t/data/contigs_pipe_character_in_seq_name.fa new file mode 100644 index 0000000..18bad42 --- /dev/null +++ b/t/data/contigs_pipe_character_in_seq_name.fa @@ -0,0 +1,6 @@ +>SomeSequenceName +AAAAAAAAAAAAAAAAAAAAAAAAA +>SeqName|with a pipe +CCCCCCCCCCCCCCCCCCCCCCCCC +>Another_Seq_name +GGGGGGGGGGGGGGGGGGGGGGGGG \ No newline at end of file diff --git a/t/data/expected_concatenated_alleles.fa b/t/data/expected_concatenated_alleles.fa new file mode 100644 index 0000000..5a7cf2f --- /dev/null +++ b/t/data/expected_concatenated_alleles.fa @@ -0,0 +1,27 @@ +>contigs +ATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCG +GGCCTGCTTATGAAGATAAAGTGGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACA +AAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTA +ACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTG +CCGACATCCTGACTTCTATGGTGGTTGACGTTTCTGACCTGCTCGACCAGGCGCGTCAGC +GTGGCGATTTCGTCATGTTCGAAGGTGCGCAGGGTACGCTGCTGGATATCGACCACGGTA +CTTATCCGTACGTAACTTCTTCCAACACCACTGCTGGTGGCGTGGCGACCGGTTCCGGCC +TGGGCCCGCGTTATGTTGATTACGTTCTGGGTATCCTCAAAGCTTACTCAACTCGTGTCG +CACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGAGC +AGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGTGCAGTAGACGTTATCGTCGTTG +ACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCACA +TGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAAGC +AGTCCAACACGCTGCTGATCTTCATCAACCAGATCCGTATGAAAATTGGTGTGATGTTCG +GTAACCCGGAAACCACTACCGGTGGTAACGCGCTGAAATTCTACGCCTCTGTTCGTCTCG +ACATCCGTCGTATCGGCGCGGTGAAAGAGGGCGAAAACGTGGTGGGTAGCGAAACCCGCG +TGAAAGTGGTGAAGAACAAAATCGCTGCACCGTTTAAACAGGCTGAATTTCAGATCCTCT +ACGGCGAAGGTATCAACTTCTACGGCGAGGGGAAAGGGACTCAGGCTCAGTTCATCATGG +AGAAATATGGTATTCCGCAAATCTCCACTGGCGATATGCTGCGTGCTGCGGTCAAATCTG +GCTCCGAGCTGGGTAAACAAGCAAAAGACATTATGGATGCTGGCAAACTGGTTACCGACG +AACTGGTGATCGCGCTGGTTAAAGGGCGCATTGCTCAGGAAGACTGCCGTAATGGTTTCC +TGTTGGACGGCTTCCCGCGTACCATTCCGCAGGCAGACGCGATGAAAGAAGCGGGCATCA +ATGTTGATTACGTTCTGGAATTCGACGTACCGGACGAACTGATCGTTGACCGTATCGTCG +GTCGCCGCGTTCACGCGCCGTCTGGTCGTGTTTATCACGTTAAATTCAATCCGCCGAAAG +TAGAAGGTAAAGACGACGTTACCGGTGAAGAACTGACTACCCGTAAAGACGATCAGGAAG +AAACCGTACGTAAACGTCTGGTTGAATACCATCAGATGACAGCACCGCTGATCGGCTACT +ACTCCAAAGAAGCTGAAGCGGGTA diff --git a/t/data/expected_mlst_results.allele.csv b/t/data/expected_mlst_results.allele.csv new file mode 100644 index 0000000..83a42d8 --- /dev/null +++ b/t/data/expected_mlst_results.allele.csv @@ -0,0 +1,2 @@ +Isolate ST "New ST" Contamination adk purA recA +contigs 4 2 3 1 diff --git a/t/data/expected_mlst_results.genomic.csv b/t/data/expected_mlst_results.genomic.csv new file mode 100644 index 0000000..fcf4876 --- /dev/null +++ b/t/data/expected_mlst_results.genomic.csv @@ -0,0 +1,2 @@ +Isolate ST "New ST" Contamination adk purA recA +contigs 4 GGGGAAAGGGACTCAGGCTCAGTTCATCATGGAGAAATATGGTATTCCGCAAATCTCCACTGGCGATATGCTGCGTGCTGCGGTCAAATCTGGCTCCGAGCTGGGTAAACAAGCAAAAGACATTATGGATGCTGGCAAACTGGTTACCGACGAACTGGTGATCGCGCTGGTTAAAGGGCGCATTGCTCAGGAAGACTGCCGTAATGGTTTCCTGTTGGACGGCTTCCCGCGTACCATTCCGCAGGCAGACGCGATGAAAGAAGCGGGCATCAATGTTGATTACGTTCTGGAATTCGACGTACCGGACGAACTGATCGTTGACCGTATCGTCGGTCGCCGCGTTCACGCGCCGTCTGGTCGTGTTTATCACGTTAAATTCAATCCGCCGAAAGTAGAAGGTAAAGACGACGTTACCGGTGAAGAACTGACTACCCGTAAAGACGATCAGGAAGAAACCGTACGTAAACGTCTGGTTGAATACCATCAGATGACAGCACCGCTGATCGGCTACTACTCCAAAGAAGCTGAAGCGGGTA ATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCGGGCCTGCTTATGAAGATAAAGTGGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACAAAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTAACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTGCCGACATCCTGACTTCTATGGTGGTTGACGTTTCTGACCTGCTCGACCAGGCGCGTCAGCGTGGCGATTTCGTCATGTTCGAAGGTGCGCAGGGTACGCTGCTGGATATCGACCACGGTACTTATCCGTACGTAACTTCTTCCAACACCACTGCTGGTGGCGTGGCGACCGGTTCCGGCCTGGGCCCGCGTTATGTTGATTACGTTCTGGGTATCCTCAAAGCTTACTCAACTCGTGT CGCACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGAGCAGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGTGCAGTAGACGTTATCGTCGTTGACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCACATGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAAGCAGTCCAACACGCTGCTGATCTTCATCAACCAGATCCGTATGAAAATTGGTGTGATGTTCGGTAACCCGGAAACCACTACCGGTGGTAACGCGCTGAAATTCTACGCCTCTGTTCGTCTCGACATCCGTCGTATCGGCGCGGTGAAAGAGGGCGAAAACGTGGTGGGTAGCGAAACCCGCGTGAAAGTGGTGAAGAACAAAATCGCTGCACCGTTTAAACAGGCTGAATTTCAGATCCTCTACGGCGAAGGTATCAACTTCTACGGCGA diff --git a/t/data/expected_three_concatenated_alleles.fa b/t/data/expected_three_concatenated_alleles.fa new file mode 100644 index 0000000..32f933c --- /dev/null +++ b/t/data/expected_three_concatenated_alleles.fa @@ -0,0 +1,54 @@ +>contigs +ATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCG +GGCCTGCTTATGAAGATAAAGTGGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACA +AAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTA +ACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTG +CCGACATCCTGACTTCTATGGTGGTTGACGTTTCTGACCTGCTCGACCAGGCGCGTCAGC +GTGGCGATTTCGTCATGTTCGAAGGTGCGCAGGGTACGCTGCTGGATATCGACCACGGTA +CTTATCCGTACGTAACTTCTTCCAACACCACTGCTGGTGGCGTGGCGACCGGTTCCGGCC +TGGGCCCGCGTTATGTTGATTACGTTCTGGGTATCCTCAAAGCTTACTCAACTCGTGTCG +CACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGAGC +AGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGTGCAGTAGACGTTATCGTCGTTG +ACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCACA +TGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAAGC +AGTCCAACACGCTGCTGATCTTCATCAACCAGATCCGTATGAAAATTGGTGTGATGTTCG +GTAACCCGGAAACCACTACCGGTGGTAACGCGCTGAAATTCTACGCCTCTGTTCGTCTCG +ACATCCGTCGTATCGGCGCGGTGAAAGAGGGCGAAAACGTGGTGGGTAGCGAAACCCGCG +TGAAAGTGGTGAAGAACAAAATCGCTGCACCGTTTAAACAGGCTGAATTTCAGATCCTCT +ACGGCGAAGGTATCAACTTCTACGGCGAGGGGAAAGGGACTCAGGCTCAGTTCATCATGG +AGAAATATGGTATTCCGCAAATCTCCACTGGCGATATGCTGCGTGCTGCGGTCAAATCTG +GCTCCGAGCTGGGTAAACAAGCAAAAGACATTATGGATGCTGGCAAACTGGTTACCGACG +AACTGGTGATCGCGCTGGTTAAAGGGCGCATTGCTCAGGAAGACTGCCGTAATGGTTTCC +TGTTGGACGGCTTCCCGCGTACCATTCCGCAGGCAGACGCGATGAAAGAAGCGGGCATCA +ATGTTGATTACGTTCTGGAATTCGACGTACCGGACGAACTGATCGTTGACCGTATCGTCG +GTCGCCGCGTTCACGCGCCGTCTGGTCGTGTTTATCACGTTAAATTCAATCCGCCGAAAG +TAGAAGGTAAAGACGACGTTACCGGTGAAGAACTGACTACCCGTAAAGACGATCAGGAAG +AAACCGTACGTAAACGTCTGGTTGAATACCATCAGATGACAGCACCGCTGATCGGCTACT +ACTCCAAAGAAGCTGAAGCGGGTA +>contigs_one_unknown +ATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCG +GGCCTGCTTATGAAGATAAAGTGGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACA +AAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTA +ACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTG +CCGACATCCTGACTTCTATGGTGGTTGACGTTTCTGACCTGCTCGACCAGGCGCGTCAGC +GTGGCGATTTCGTCATGTTCGAAGGTGCGCAGGGTACGCTGCTGGATATCGACCACGGTA +CTTATCCGTACGTAACTTCTTCCAACACCACTGCTGGTGGCGTGGCGACCGGTTCCGGCC +TGGGCCCGCGTTATGTTGATTACGTTCTGGGTATCCTCAAAGCTTACTCAACTCGTGTCG +CACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGAGC +AGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGTGCAGTAGACGTTATCGTCGTTG +ACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCACA +TGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAAGC +AGTCCAACACGCTGCTGATCTTCATCAACCAGATCCGTATGGAAATTGGTGTGATGTTCG +GTAACCCGGAAACCACTACCGGTGGTAACGCGCTGAAATTCTACGCCTCTGTTCGTCTCG +ACATCCGTCGTATCGGCGCGGTGAAAGAGGGCGAAAACGTGGTGGGTAGCGAAACCCGCG +TGAAAGTGGTGAAGAACAAAATCGCTGCACCGTTTAAACAGGCTGAATTTCAGATCCTCT +ACGGCGAAGGTATCAACTTCTACGGCGAGGGGAAAGGGACTCAGGCTCAGTTCATCATGG +AGAAATATGGTATTCCGCAAATCTCCACTGGCGATATGCTGCGTGCTGCGGTCAAATCTG +GCTCCGAGCTGGGTAAACAAGCAAAAGACATTATGGATGCTGGCAAACTGGTTACCGACG +AACTGGTGATCGCGCTGGTTAAAGGGCGCATTGCTCAGGAAGACTGCCGTAATGGTTTCC +TGTTGGACGGCTTCCCGCGTACCATTCCGCAGGGAGACGCGATGAAAGAAGCGGGCATCA +ATGTTGATTACGTTCTGGAATTCGACGTACCGGACGAACTGATCGTTGACCGTATCGTCG +GTCGCCGCGTTCACGCGCCGTCTGGTCGTGTTTATCACGTTAAATTCAATCCGCCGAAAG +TAGAAGGTAAAGACGACGTTACCGGTGAAGAACTGACTACCCGTAAAGACGATCAGGAAG +AAACCGTACGTAAACGTCTGGTTGAATACCATCAGATGACAGCACCGCTGATCGGCTACT +ACTCCAAAGAAGCTGAAGCGGGTA diff --git a/t/data/expected_three_contigs_one_unknown.unknown_allele.adk-2.fa b/t/data/expected_three_contigs_one_unknown.unknown_allele.adk-2.fa new file mode 100644 index 0000000..53013b2 --- /dev/null +++ b/t/data/expected_three_contigs_one_unknown.unknown_allele.adk-2.fa @@ -0,0 +1,10 @@ +>adk-2 +GGGGAAAGGGACTCAGGCTCAGTTCATCATGGAGAAATATGGTATTCCGCAAATCTCCAC +TGGCGATATGCTGCGTGCTGCGGTCAAATCTGGCTCCGAGCTGGGTAAACAAGCAAAAGA +CATTATGGATGCTGGCAAACTGGTTACCGACGAACTGGTGATCGCGCTGGTTAAAGGGCG +CATTGCTCAGGAAGACTGCCGTAATGGTTTCCTGTTGGACGGCTTCCCGCGTACCATTCC +GCAGGGAGACGCGATGAAAGAAGCGGGCATCAATGTTGATTACGTTCTGGAATTCGACGT +ACCGGACGAACTGATCGTTGACCGTATCGTCGGTCGCCGCGTTCACGCGCCGTCTGGTCG +TGTTTATCACGTTAAATTCAATCCGCCGAAAGTAGAAGGTAAAGACGACGTTACCGGTGA +AGAACTGACTACCCGTAAAGACGATCAGGAAGAAACCGTACGTAAACGTCTGGTTGAATA +CCATCAGATGACAGCACCGCTGATCGGCTACTACTCCAAAGAAGCTGAAGCGGGTA diff --git a/t/data/expected_three_contigs_one_unknown.unknown_allele.recA-1.fa b/t/data/expected_three_contigs_one_unknown.unknown_allele.recA-1.fa new file mode 100644 index 0000000..2b34061 --- /dev/null +++ b/t/data/expected_three_contigs_one_unknown.unknown_allele.recA-1.fa @@ -0,0 +1,10 @@ +>recA-1 +CGCACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGA +GCAGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGTGCAGTAGACGTTATCGTCGT +TGACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCA +CATGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAA +GCAGTCCAACACGCTGCTGATCTTCATCAACCAGATCCGTATGGAAATTGGTGTGATGTT +CGGTAACCCGGAAACCACTACCGGTGGTAACGCGCTGAAATTCTACGCCTCTGTTCGTCT +CGACATCCGTCGTATCGGCGCGGTGAAAGAGGGCGAAAACGTGGTGGGTAGCGAAACCCG +CGTGAAAGTGGTGAAGAACAAAATCGCTGCACCGTTTAAACAGGCTGAATTTCAGATCCT +CTACGGCGAAGGTATCAACTTCTACGGCGA diff --git a/t/data/expected_three_mlst_results.allele.csv b/t/data/expected_three_mlst_results.allele.csv new file mode 100644 index 0000000..2161c82 --- /dev/null +++ b/t/data/expected_three_mlst_results.allele.csv @@ -0,0 +1,4 @@ +Isolate ST "New ST" Contamination adk-2 purA recA-1 +contigs 4 2 3 1 +contigs_pipe_character_in_seq_name "New ST" U U U +contigs_one_unknown 1 "New ST" U 3 U diff --git a/t/data/expected_three_mlst_results.genomic.csv b/t/data/expected_three_mlst_results.genomic.csv new file mode 100644 index 0000000..f165dcf --- /dev/null +++ b/t/data/expected_three_mlst_results.genomic.csv @@ -0,0 +1,4 @@ +Isolate ST "New ST" Contamination adk-2 purA recA-1 +contigs 4 GGGGAAAGGGACTCAGGCTCAGTTCATCATGGAGAAATATGGTATTCCGCAAATCTCCACTGGCGATATGCTGCGTGCTGCGGTCAAATCTGGCTCCGAGCTGGGTAAACAAGCAAAAGACATTATGGATGCTGGCAAACTGGTTACCGACGAACTGGTGATCGCGCTGGTTAAAGGGCGCATTGCTCAGGAAGACTGCCGTAATGGTTTCCTGTTGGACGGCTTCCCGCGTACCATTCCGCAGGCAGACGCGATGAAAGAAGCGGGCATCAATGTTGATTACGTTCTGGAATTCGACGTACCGGACGAACTGATCGTTGACCGTATCGTCGGTCGCCGCGTTCACGCGCCGTCTGGTCGTGTTTATCACGTTAAATTCAATCCGCCGAAAGTAGAAGGTAAAGACGACGTTACCGGTGAAGAACTGACTACCCGTAAAGACGATCAGGAAGAAACCGTACGTAAACGTCTGGTTGAATACCATCAGATGACAGCACCGCTGATCGGCTACTACTCCAAAGAAGCTGAAGCGGGTA ATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCGGGCCTGCTTATGAAGATAAAGTGGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACAAAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTAACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTGCCGACATCCTGACTTCTATGGTGGTTGACGTTTCTGACCTGCTCGACCAGGCGCGTCAGCGTGGCGATTTCGTCATGTTCGAAGGTGCGCAGGGTACGCTGCTGGATATCGACCACGGTACTTATCCGTACGTAACTTCTTCCAACACCACTGCTGGTGGCGTGGCGACCGGTTCCGGCCTGGGCCCGCGTTATGTTGATTACGTTCTGGGTATCCTCAAAGCTTACTCAACTCGTGT CGCACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGAGCAGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGTGCAGTAGACGTTATCGTCGTTGACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCACATGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAAGCAGTCCAACACGCTGCTGATCTTCATCAACCAGATCCGTATGAAAATTGGTGTGATGTTCGGTAACCCGGAAACCACTACCGGTGGTAACGCGCTGAAATTCTACGCCTCTGTTCGTCTCGACATCCGTCGTATCGGCGCGGTGAAAGAGGGCGAAAACGTGGTGGGTAGCGAAACCCGCGTGAAAGTGGTGAAGAACAAAATCGCTGCACCGTTTAAACAGGCTGAATTTCAGATCCTCTACGGCGAAGGTATCAACTTCTACGGCGA +contigs_pipe_character_in_seq_name "New ST" U U U +contigs_one_unknown 1 "New ST" U ATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCGGGCCTGCTTATGAAGATAAAGTGGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACAAAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTAACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTGCCGACATCCTGACTTCTATGGTGGTTGACGTTTCTGACCTGCTCGACCAGGCGCGTCAGCGTGGCGATTTCGTCATGTTCGAAGGTGCGCAGGGTACGCTGCTGGATATCGACCACGGTACTTATCCGTACGTAACTTCTTCCAACACCACTGCTGGTGGCGTGGCGACCGGTTCCGGCCTGGGCCCGCGTTATGTTGATTACGTTCTGGGTATCCTCAAAGCTTACTCAACTCGTGT U diff --git a/t/data/expected_two_concatenated_alleles.fa b/t/data/expected_two_concatenated_alleles.fa new file mode 100644 index 0000000..5a7cf2f --- /dev/null +++ b/t/data/expected_two_concatenated_alleles.fa @@ -0,0 +1,27 @@ +>contigs +ATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCG +GGCCTGCTTATGAAGATAAAGTGGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACA +AAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTA +ACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTG +CCGACATCCTGACTTCTATGGTGGTTGACGTTTCTGACCTGCTCGACCAGGCGCGTCAGC +GTGGCGATTTCGTCATGTTCGAAGGTGCGCAGGGTACGCTGCTGGATATCGACCACGGTA +CTTATCCGTACGTAACTTCTTCCAACACCACTGCTGGTGGCGTGGCGACCGGTTCCGGCC +TGGGCCCGCGTTATGTTGATTACGTTCTGGGTATCCTCAAAGCTTACTCAACTCGTGTCG +CACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGAGC +AGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGTGCAGTAGACGTTATCGTCGTTG +ACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCACA +TGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAAGC +AGTCCAACACGCTGCTGATCTTCATCAACCAGATCCGTATGAAAATTGGTGTGATGTTCG +GTAACCCGGAAACCACTACCGGTGGTAACGCGCTGAAATTCTACGCCTCTGTTCGTCTCG +ACATCCGTCGTATCGGCGCGGTGAAAGAGGGCGAAAACGTGGTGGGTAGCGAAACCCGCG +TGAAAGTGGTGAAGAACAAAATCGCTGCACCGTTTAAACAGGCTGAATTTCAGATCCTCT +ACGGCGAAGGTATCAACTTCTACGGCGAGGGGAAAGGGACTCAGGCTCAGTTCATCATGG +AGAAATATGGTATTCCGCAAATCTCCACTGGCGATATGCTGCGTGCTGCGGTCAAATCTG +GCTCCGAGCTGGGTAAACAAGCAAAAGACATTATGGATGCTGGCAAACTGGTTACCGACG +AACTGGTGATCGCGCTGGTTAAAGGGCGCATTGCTCAGGAAGACTGCCGTAATGGTTTCC +TGTTGGACGGCTTCCCGCGTACCATTCCGCAGGCAGACGCGATGAAAGAAGCGGGCATCA +ATGTTGATTACGTTCTGGAATTCGACGTACCGGACGAACTGATCGTTGACCGTATCGTCG +GTCGCCGCGTTCACGCGCCGTCTGGTCGTGTTTATCACGTTAAATTCAATCCGCCGAAAG +TAGAAGGTAAAGACGACGTTACCGGTGAAGAACTGACTACCCGTAAAGACGATCAGGAAG +AAACCGTACGTAAACGTCTGGTTGAATACCATCAGATGACAGCACCGCTGATCGGCTACT +ACTCCAAAGAAGCTGAAGCGGGTA diff --git a/t/data/expected_two_mlst_results.allele.csv b/t/data/expected_two_mlst_results.allele.csv new file mode 100644 index 0000000..d90fe7e --- /dev/null +++ b/t/data/expected_two_mlst_results.allele.csv @@ -0,0 +1,3 @@ +Isolate ST "New ST" Contamination adk purA recA +contigs 4 2 3 1 +contigs_pipe_character_in_seq_name "New ST" U U U diff --git a/t/data/expected_two_mlst_results.genomic.csv b/t/data/expected_two_mlst_results.genomic.csv new file mode 100644 index 0000000..3678e96 --- /dev/null +++ b/t/data/expected_two_mlst_results.genomic.csv @@ -0,0 +1,3 @@ +Isolate ST "New ST" Contamination adk purA recA +contigs 4 GGGGAAAGGGACTCAGGCTCAGTTCATCATGGAGAAATATGGTATTCCGCAAATCTCCACTGGCGATATGCTGCGTGCTGCGGTCAAATCTGGCTCCGAGCTGGGTAAACAAGCAAAAGACATTATGGATGCTGGCAAACTGGTTACCGACGAACTGGTGATCGCGCTGGTTAAAGGGCGCATTGCTCAGGAAGACTGCCGTAATGGTTTCCTGTTGGACGGCTTCCCGCGTACCATTCCGCAGGCAGACGCGATGAAAGAAGCGGGCATCAATGTTGATTACGTTCTGGAATTCGACGTACCGGACGAACTGATCGTTGACCGTATCGTCGGTCGCCGCGTTCACGCGCCGTCTGGTCGTGTTTATCACGTTAAATTCAATCCGCCGAAAGTAGAAGGTAAAGACGACGTTACCGGTGAAGAACTGACTACCCGTAAAGACGATCAGGAAGAAACCGTACGTAAACGTCTGGTTGAATACCATCAGATGACAGCACCGCTGATCGGCTACTACTCCAAAGAAGCTGAAGCGGGTA ATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCGGGCCTGCTTATGAAGATAAAGTGGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACAAAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTAACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTGCCGACATCCTGACTTCTATGGTGGTTGACGTTTCTGACCTGCTCGACCAGGCGCGTCAGCGTGGCGATTTCGTCATGTTCGAAGGTGCGCAGGGTACGCTGCTGGATATCGACCACGGTACTTATCCGTACGTAACTTCTTCCAACACCACTGCTGGTGGCGTGGCGACCGGTTCCGGCCTGGGCCCGCGTTATGTTGATTACGTTCTGGGTATCCTCAAAGCTTACTCAACTCGTGT CGCACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGAGCAGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGTGCAGTAGACGTTATCGTCGTTGACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCACATGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAAGCAGTCCAACACGCTGCTGATCTTCATCAACCAGATCCGTATGAAAATTGGTGTGATGTTCGGTAACCCGGAAACCACTACCGGTGGTAACGCGCTGAAATTCTACGCCTCTGTTCGTCTCGACATCCGTCGTATCGGCGCGGTGAAAGAGGGCGAAAACGTGGTGGGTAGCGAAACCCGCGTGAAAGTGGTGAAGAACAAAATCGCTGCACCGTTTAAACAGGCTGAATTTCAGATCCTCTACGGCGAAGGTATCAACTTCTACGGCGA +contigs_pipe_character_in_seq_name "New ST" U U U