From 5fb95991b9aab01a6cfcbf3efd990186f64d5cdb Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Mon, 26 Jan 2015 10:04:05 +0000 Subject: [PATCH 1/4] further debian changelog syntax fix --- debian/changelog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/changelog b/debian/changelog index 04badfe7..7507d32e 100644 --- a/debian/changelog +++ b/debian/changelog @@ -2,7 +2,7 @@ gubbins (1.1.1~trusty1) trusty; urgency=low * Message to tell user theres a problem with outgroups not a clade - -- Andrew Page Fri, 23 Jan 2015 16:02 gubbins (1.1.0~trusty1) trusty; urgency=low + -- Andrew Page Fri, 23 Jan 2015 16:02:00 +0000 gubbins (1.1.0~trusty1) trusty; urgency=low From c3daaaa3266529d796e0cd8841971d83aa6b900f Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Mon, 26 Jan 2015 10:21:52 +0000 Subject: [PATCH 2/4] make sure ntp is available --- release/manifests/trustyvm.pp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release/manifests/trustyvm.pp b/release/manifests/trustyvm.pp index e3bff626..0c9b9a02 100644 --- a/release/manifests/trustyvm.pp +++ b/release/manifests/trustyvm.pp @@ -2,7 +2,7 @@ ensure => "installed" } -package { ["gcc", "build-essential", "pkg-config"]: +package { ["gcc", "build-essential", "pkg-config","ntp"]: ensure => "installed" } From c2fab08780591601e233cba0efa749d94a2ff4cf Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Fri, 6 Mar 2015 09:46:24 +0000 Subject: [PATCH 3/4] fix gff format and better error messages --- python/gubbins/common.py | 53 +++++++++++++------ ...lt.multiple_recombinations.iteration_5.gff | 8 +-- src/gff_file.c | 4 +- tests/data/one_recombination.tre.expected.gff | 2 +- 4 files changed, 43 insertions(+), 24 deletions(-) diff --git a/python/gubbins/common.py b/python/gubbins/common.py index 34ce2379..b316ac64 100644 --- a/python/gubbins/common.py +++ b/python/gubbins/common.py @@ -17,24 +17,23 @@ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # -import sys -import argparse -import subprocess -import os -import time -import re -import tempfile -from collections import Counter +from Bio import AlignIO from Bio import Phylo -import dendropy from Bio import SeqIO -from Bio import AlignIO from Bio.Align import MultipleSeqAlignment from Bio.Seq import Seq from cStringIO import StringIO +from collections import Counter +import argparse +import dendropy +import math +import os +import re import shutil import subprocess -import math +import sys +import tempfile +import time class GubbinsError(Exception): def __init__(self, value,message): @@ -205,7 +204,11 @@ def parse_and_run(self): # find all snp sites if self.args.verbose > 0: print GUBBINS_EXEC +" "+ self.args.alignment_filename - subprocess.check_call([GUBBINS_EXEC, self.args.alignment_filename]) + try: + subprocess.check_call([GUBBINS_EXEC, self.args.alignment_filename]) + except: + sys.exit("Gubbins crashed, please ensure you have enough free memory") + if self.args.verbose > 0: print int(time.time()) @@ -282,7 +285,10 @@ def parse_and_run(self): if self.args.starting_tree is not None and i == 1: shutil.copyfile(self.args.starting_tree, current_tree_name) else: - subprocess.check_call(tree_building_command, shell=True) + try: + subprocess.check_call(tree_building_command, shell=True) + except: + sys.exit("Failed while building the tree.") if self.args.verbose > 0: print int(time.time()) @@ -295,7 +301,11 @@ def parse_and_run(self): fastml_command_suffix = '' - subprocess.check_call(fastml_command+fastml_command_suffix, shell=True) + try: + subprocess.check_call(fastml_command+fastml_command_suffix, shell=True) + except: + sys.exit("Failed while running FastML") + shutil.copyfile(current_tree_name+'.output_tree',current_tree_name) shutil.copyfile(starting_base_filename+".start", starting_base_filename+".gaps.snp_sites.aln") GubbinsCommon.reinsert_gaps_into_fasta_file(current_tree_name+'.seq.joint.txt', starting_base_filename +".gaps.vcf", starting_base_filename+".gaps.snp_sites.aln") @@ -309,7 +319,10 @@ def parse_and_run(self): if self.args.verbose > 0: print gubbins_command - subprocess.check_call(gubbins_command, shell=True) + try: + subprocess.check_call(gubbins_command, shell=True) + except: + sys.exit("Failed while running Gubbins. Please ensure you have enough free memory") if self.args.verbose > 0: print int(time.time()) @@ -941,10 +954,16 @@ def pairwise_comparison(filename,base_filename,gubbins_exec,alignment_filename,f sequence_names = GubbinsCommon.get_sequence_names_from_alignment(filename) GubbinsCommon.create_pairwise_newick_tree(sequence_names, base_filename+".tre") - subprocess.check_call(GubbinsCommon.generate_fastml_command(fastml_exec, base_filename+".gaps.snp_sites.aln", base_filename+".tre"), shell=True) + try: + subprocess.check_call(GubbinsCommon.generate_fastml_command(fastml_exec, base_filename+".gaps.snp_sites.aln", base_filename+".tre"), shell=True) + except: + sys.exit("Failed while running fastML") shutil.copyfile(base_filename+'.tre.output_tree',base_filename+".tre") shutil.copyfile(base_filename+'.tre.seq.joint.txt', base_filename+".snp_sites.aln") - subprocess.check_call(gubbins_exec+" -r -v "+base_filename+".vcf -t "+base_filename+".tre -f "+ alignment_filename +" "+ base_filename+".snp_sites.aln", shell=True) + try: + subprocess.check_call(gubbins_exec+" -r -v "+base_filename+".vcf -t "+base_filename+".tre -f "+ alignment_filename +" "+ base_filename+".snp_sites.aln", shell=True) + except: + sys.exit("Failed while running Gubbins") GubbinsCommon.rename_files(GubbinsCommon.translation_of_filenames_to_final_filenames_pairwise(base_filename, base_filename_without_ext)) @staticmethod diff --git a/python/gubbins/tests/data/expected_RAxML_result.multiple_recombinations.iteration_5.gff b/python/gubbins/tests/data/expected_RAxML_result.multiple_recombinations.iteration_5.gff index dedc8f10..49b3ad38 100644 --- a/python/gubbins/tests/data/expected_RAxML_result.multiple_recombinations.iteration_5.gff +++ b/python/gubbins/tests/data/expected_RAxML_result.multiple_recombinations.iteration_5.gff @@ -1,6 +1,6 @@ ##gff-version 3 ##sequence-region SEQUENCE 1 242 -SEQUENCE GUBBINS CDS 29 49 0.000 . 0 node="N7->sequence_6";neg_log_likelihood="4.955311"taxa="sequence_6";snp_count="21" -SEQUENCE GUBBINS CDS 29 84 0.000 . 0 node="N5->N6";neg_log_likelihood="12.082148"taxa=" sequence_9 sequence_6 sequence_8 sequence_7";snp_count="51" -SEQUENCE GUBBINS CDS 51 84 0.000 . 0 node="N4->N5";neg_log_likelihood="10.195830"taxa=" sequence_5 sequence_9 sequence_6 sequence_8 sequence_7";snp_count="30" -SEQUENCE GUBBINS CDS 51 84 0.000 . 0 node="N1->N4";neg_log_likelihood="10.195830"taxa=" sequence_1 sequence_5 sequence_9 sequence_6 sequence_8 sequence_7";snp_count="30" +SEQUENCE GUBBINS CDS 29 49 0.000 . 0 node="N7->sequence_6";neg_log_likelihood="4.955311";taxa="sequence_6";snp_count="21"; +SEQUENCE GUBBINS CDS 29 84 0.000 . 0 node="N5->N6";neg_log_likelihood="12.082148";taxa=" sequence_9 sequence_6 sequence_8 sequence_7";snp_count="51"; +SEQUENCE GUBBINS CDS 51 84 0.000 . 0 node="N4->N5";neg_log_likelihood="10.195830";taxa=" sequence_5 sequence_9 sequence_6 sequence_8 sequence_7";snp_count="30"; +SEQUENCE GUBBINS CDS 51 84 0.000 . 0 node="N1->N4";neg_log_likelihood="10.195830";taxa=" sequence_1 sequence_5 sequence_9 sequence_6 sequence_8 sequence_7";snp_count="30"; diff --git a/src/gff_file.c b/src/gff_file.c index e4c74642..a277a1a3 100644 --- a/src/gff_file.c +++ b/src/gff_file.c @@ -37,9 +37,9 @@ void print_gff_line(FILE * gff_file_pointer, int start_coordinate, int end_coord fprintf(gff_file_pointer, "0.000\t.\t0\t"); fprintf(gff_file_pointer, "node=\"%s->%s\";", parent_node_id, current_node_id ); - fprintf(gff_file_pointer, "neg_log_likelihood=\"%f\"", neg_log_likelihood); + fprintf(gff_file_pointer, "neg_log_likelihood=\"%f\";", neg_log_likelihood); fprintf(gff_file_pointer, "taxa=\"%s\";", taxon_names); - fprintf(gff_file_pointer, "snp_count=\"%d\"", number_of_snps); + fprintf(gff_file_pointer, "snp_count=\"%d\";", number_of_snps); fprintf(gff_file_pointer, "\n"); fflush(gff_file_pointer); diff --git a/tests/data/one_recombination.tre.expected.gff b/tests/data/one_recombination.tre.expected.gff index 2c46eb1b..46833b5c 100644 --- a/tests/data/one_recombination.tre.expected.gff +++ b/tests/data/one_recombination.tre.expected.gff @@ -1,3 +1,3 @@ ##gff-version 3 ##sequence-region SEQUENCE 1 135 -SEQUENCE GUBBINS CDS 63 167 0.000 . 0 node="N5->sequence_2";taxa="sequence_2";snp_count="103" +SEQUENCE GUBBINS CDS 63 167 0.000 . 0 node="N5->sequence_2";taxa="sequence_2";snp_count="103"; From 5cbe0f773bafb6e73185dcc695cba2827c82faeb Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Fri, 6 Mar 2015 11:29:30 +0000 Subject: [PATCH 4/4] Fix warnings --- VERSION | 2 +- debian/changelog | 6 ++++++ install-userspace.sh | 2 +- src/alignment_file.c | 1 + src/gubbins.c | 3 ++- src/main.c | 6 +----- src/parse_phylip.h | 2 ++ src/seqUtil.c | 1 + src/seqUtil.h | 1 + src/snp_searching.c | 1 + src/snp_searching.h | 2 ++ src/snp_sites.c | 1 + src/vcf.c | 2 +- 13 files changed, 21 insertions(+), 9 deletions(-) diff --git a/VERSION b/VERSION index 8cfbc905..8428158d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.1.1 \ No newline at end of file +1.1.2 \ No newline at end of file diff --git a/debian/changelog b/debian/changelog index 7507d32e..5f1cf9fe 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +gubbins (1.1.2~trusty1) trusty; urgency=low + + * Fix GFF formatting and C warnings + + -- Andrew Page Fri, 23 Jan 2015 16:02:00 +0000 + gubbins (1.1.1~trusty1) trusty; urgency=low * Message to tell user theres a problem with outgroups not a clade diff --git a/install-userspace.sh b/install-userspace.sh index 319ff6d1..9aa2205a 100755 --- a/install-userspace.sh +++ b/install-userspace.sh @@ -13,7 +13,7 @@ # py_pkgs=( "biopython" "dendropy" ) -deb_urls=( "http://uk.archive.ubuntu.com/ubuntu/pool/universe/r/raxml/raxml_7.2.8-2_amd64.deb" "https://launchpad.net/~ap13/+archive/ubuntu/gubbins/+files/fastml2_2.2~trusty1_amd64.deb" "https://launchpad.net/~ap13/+archive/ubuntu/gubbins/+files/gubbins_0.1.7~trusty2_amd64.deb" ) +deb_urls=( "http://uk.archive.ubuntu.com/ubuntu/pool/universe/r/raxml/raxml_7.2.8-2_amd64.deb" "https://launchpad.net/~ap13/+archive/ubuntu/gubbins/+files/fastml2_2.3~trusty1_amd64.deb" "https://launchpad.net/~ap13/+archive/ubuntu/gubbins/+files/gubbins_1.1.1~trusty1_amd64.deb" ) function check_platform { # Ubuntu 14.04 diff --git a/src/alignment_file.c b/src/alignment_file.c index 08f5f02a..20aba56c 100644 --- a/src/alignment_file.c +++ b/src/alignment_file.c @@ -28,6 +28,7 @@ #include "vcf.h" #include "alignment_file.h" #include "snp_sites.h" +#include "string_cat.h" KSEQ_INIT(gzFile, gzread) diff --git a/src/gubbins.c b/src/gubbins.c index 15a3d4ab..dbd35671 100644 --- a/src/gubbins.c +++ b/src/gubbins.c @@ -31,7 +31,8 @@ #include "tree_scaling.h" #include "seqUtil.h" #include "Newickform.h" - +#include "tree_statistics.h" +#include "fasta_of_snp_sites.h" // get reference sequence from VCF, and store snp locations diff --git a/src/main.c b/src/main.c index c37c4e61..7e2a191d 100644 --- a/src/main.c +++ b/src/main.c @@ -49,11 +49,6 @@ void print_usage(FILE* stream, int exit_code) " -m Min SNPs for identifying a recombination block\n" " -h Display this usage information.\n\n" ); - - fprintf (stream, "Step 1: Detect SNP sites (generates inputs files for step 2)\n"); - fprintf (stream, "gubbins alignment_file\n\n", program_name); - fprintf (stream, "Step 2: Detect recombinations\n"); - fprintf (stream, "gubbins -r -v vcf_file -t newick_tree -f original.aln -m 10 alignment_file\n\n", program_name); exit (exit_code); } @@ -64,6 +59,7 @@ int check_file_exists_or_exit(char * filename) } else { printf("Error: File '%s' doesnt exist\n",filename); print_usage(stderr, EXIT_FAILURE); + return 0; } } diff --git a/src/parse_phylip.h b/src/parse_phylip.h index 8481280f..ba990417 100644 --- a/src/parse_phylip.h +++ b/src/parse_phylip.h @@ -50,6 +50,8 @@ int get_internal_node(int sequence_index); void fill_in_unambiguous_bases_in_parent_from_children_where_parent_has_a_gap(int parent_sequence_index, int * child_sequence_indices, int num_children); void fill_in_unambiguous_gaps_in_parent_from_children(int parent_sequence_index, int * child_sequence_indices, int num_children); void freeup_memory(); +void set_number_of_bases_in_recombinations(char * sample_name, int bases_in_recombinations); +void filter_sequence_bases_and_rotate(char * reference_bases, char ** filtered_bases_for_snps, int number_of_filtered_snps); #define MAX_READ_BUFFER 65536 #define MAX_SAMPLE_NAME_SIZE 1024 diff --git a/src/seqUtil.c b/src/seqUtil.c index eda3f9e3..1f9f67fe 100644 --- a/src/seqUtil.c +++ b/src/seqUtil.c @@ -1,6 +1,7 @@ #define __SEQUTIL_C__ #include "seqUtil.h" +#include "string_cat.h" /* * Yu-Wei Wu http://yuweibioinfo.blogspot.com/2008/10/newick-tree-parser-in-c-make-use-of.html diff --git a/src/seqUtil.h b/src/seqUtil.h index f4cace58..74a3c56f 100644 --- a/src/seqUtil.h +++ b/src/seqUtil.h @@ -4,6 +4,7 @@ #include #include #include +#include "string_cat.h" /* * Yu-Wei Wu http://yuweibioinfo.blogspot.com/2008/10/newick-tree-parser-in-c-make-use-of.html diff --git a/src/snp_searching.c b/src/snp_searching.c index 67ad72e4..d018f9b0 100644 --- a/src/snp_searching.c +++ b/src/snp_searching.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "snp_searching.h" // Most of the methods in this file look the same, so should be DRYed out. diff --git a/src/snp_searching.h b/src/snp_searching.h index 3a4c07ed..c14fb276 100644 --- a/src/snp_searching.h +++ b/src/snp_searching.h @@ -33,5 +33,7 @@ int advance_window_start_to_next_snp_with_start_index(int window_start_coordinat int rewind_window_end_to_last_snp_with_start_end_index(int window_end_coordinate, int * snp_locations, char * child_sequence, int number_of_branch_snps, int start_index,int end_index); int find_number_of_snps_in_block_with_start_end_index(int window_start_coordinate, int window_end_coordinate, int * snp_locations, char * child_sequence, int number_of_snps, int start_index,int end_index); int get_window_end_coordinates_excluding_gaps_with_start_end_index(int window_start_coordinate, int window_size, int * snp_locations, char * child_sequence, int number_of_snps, int start_index,int end_index); +int calculate_block_size_without_gaps_with_start_end_index(char * child_sequence, int * snp_locations, int starting_coordinate, int ending_coordinate, int length_of_original_genome, int start_index,int end_index); + #endif \ No newline at end of file diff --git a/src/snp_sites.c b/src/snp_sites.c index d0e72d5b..453a9611 100644 --- a/src/snp_sites.c +++ b/src/snp_sites.c @@ -28,6 +28,7 @@ #include "phylip_of_snp_sites.h" #include "parse_phylip.h" #include "string_cat.h" +#include "fasta_of_snp_sites.h" void build_snp_locations(int snp_locations[], char reference_sequence[]) diff --git a/src/vcf.c b/src/vcf.c index e50f003d..6ebf34d6 100644 --- a/src/vcf.c +++ b/src/vcf.c @@ -168,7 +168,7 @@ void output_vcf_row_samples_bases(FILE * vcf_file_pointer, char reference_base, { continue; } - if((bases_for_snp[i] == reference_base)) + if(bases_for_snp[i] == reference_base) { fprintf( vcf_file_pointer, "%c", (char) reference_base ); }