Skip to content

Commit

Permalink
Merge pull request #128 from andrewjpage/update_robustness
Browse files Browse the repository at this point in the history
Update robustness
  • Loading branch information
andrewjpage committed Mar 6, 2015
2 parents 49ef9ae + 5cbe0f7 commit 4b7486a
Show file tree
Hide file tree
Showing 18 changed files with 66 additions and 35 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.1.1
1.1.2
8 changes: 7 additions & 1 deletion debian/changelog
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
gubbins (1.1.2~trusty1) trusty; urgency=low

* Fix GFF formatting and C warnings

-- Andrew Page <ap13@sanger.ac.uk> Fri, 23 Jan 2015 16:02:00 +0000

gubbins (1.1.1~trusty1) trusty; urgency=low

* Message to tell user theres a problem with outgroups not a clade

-- Andrew Page <ap13@sanger.ac.uk> Fri, 23 Jan 2015 16:02 gubbins (1.1.0~trusty1) trusty; urgency=low
-- Andrew Page <ap13@sanger.ac.uk> Fri, 23 Jan 2015 16:02:00 +0000

gubbins (1.1.0~trusty1) trusty; urgency=low

Expand Down
2 changes: 1 addition & 1 deletion install-userspace.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#

py_pkgs=( "biopython" "dendropy" )
deb_urls=( "http://uk.archive.ubuntu.com/ubuntu/pool/universe/r/raxml/raxml_7.2.8-2_amd64.deb" "https://launchpad.net/~ap13/+archive/ubuntu/gubbins/+files/fastml2_2.2~trusty1_amd64.deb" "https://launchpad.net/~ap13/+archive/ubuntu/gubbins/+files/gubbins_0.1.7~trusty2_amd64.deb" )
deb_urls=( "http://uk.archive.ubuntu.com/ubuntu/pool/universe/r/raxml/raxml_7.2.8-2_amd64.deb" "https://launchpad.net/~ap13/+archive/ubuntu/gubbins/+files/fastml2_2.3~trusty1_amd64.deb" "https://launchpad.net/~ap13/+archive/ubuntu/gubbins/+files/gubbins_1.1.1~trusty1_amd64.deb" )

function check_platform {
# Ubuntu 14.04
Expand Down
53 changes: 36 additions & 17 deletions python/gubbins/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,23 @@
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#

import sys
import argparse
import subprocess
import os
import time
import re
import tempfile
from collections import Counter
from Bio import AlignIO
from Bio import Phylo
import dendropy
from Bio import SeqIO
from Bio import AlignIO
from Bio.Align import MultipleSeqAlignment
from Bio.Seq import Seq
from cStringIO import StringIO
from collections import Counter
import argparse
import dendropy
import math
import os
import re
import shutil
import subprocess
import math
import sys
import tempfile
import time

class GubbinsError(Exception):
def __init__(self, value,message):
Expand Down Expand Up @@ -205,7 +204,11 @@ def parse_and_run(self):
# find all snp sites
if self.args.verbose > 0:
print GUBBINS_EXEC +" "+ self.args.alignment_filename
subprocess.check_call([GUBBINS_EXEC, self.args.alignment_filename])
try:
subprocess.check_call([GUBBINS_EXEC, self.args.alignment_filename])
except:
sys.exit("Gubbins crashed, please ensure you have enough free memory")

if self.args.verbose > 0:
print int(time.time())

Expand Down Expand Up @@ -282,7 +285,10 @@ def parse_and_run(self):
if self.args.starting_tree is not None and i == 1:
shutil.copyfile(self.args.starting_tree, current_tree_name)
else:
subprocess.check_call(tree_building_command, shell=True)
try:
subprocess.check_call(tree_building_command, shell=True)
except:
sys.exit("Failed while building the tree.")

if self.args.verbose > 0:
print int(time.time())
Expand All @@ -295,7 +301,11 @@ def parse_and_run(self):
fastml_command_suffix = ''


subprocess.check_call(fastml_command+fastml_command_suffix, shell=True)
try:
subprocess.check_call(fastml_command+fastml_command_suffix, shell=True)
except:
sys.exit("Failed while running FastML")

shutil.copyfile(current_tree_name+'.output_tree',current_tree_name)
shutil.copyfile(starting_base_filename+".start", starting_base_filename+".gaps.snp_sites.aln")
GubbinsCommon.reinsert_gaps_into_fasta_file(current_tree_name+'.seq.joint.txt', starting_base_filename +".gaps.vcf", starting_base_filename+".gaps.snp_sites.aln")
Expand All @@ -309,7 +319,10 @@ def parse_and_run(self):

if self.args.verbose > 0:
print gubbins_command
subprocess.check_call(gubbins_command, shell=True)
try:
subprocess.check_call(gubbins_command, shell=True)
except:
sys.exit("Failed while running Gubbins. Please ensure you have enough free memory")
if self.args.verbose > 0:
print int(time.time())

Expand Down Expand Up @@ -941,10 +954,16 @@ def pairwise_comparison(filename,base_filename,gubbins_exec,alignment_filename,f
sequence_names = GubbinsCommon.get_sequence_names_from_alignment(filename)
GubbinsCommon.create_pairwise_newick_tree(sequence_names, base_filename+".tre")

subprocess.check_call(GubbinsCommon.generate_fastml_command(fastml_exec, base_filename+".gaps.snp_sites.aln", base_filename+".tre"), shell=True)
try:
subprocess.check_call(GubbinsCommon.generate_fastml_command(fastml_exec, base_filename+".gaps.snp_sites.aln", base_filename+".tre"), shell=True)
except:
sys.exit("Failed while running fastML")
shutil.copyfile(base_filename+'.tre.output_tree',base_filename+".tre")
shutil.copyfile(base_filename+'.tre.seq.joint.txt', base_filename+".snp_sites.aln")
subprocess.check_call(gubbins_exec+" -r -v "+base_filename+".vcf -t "+base_filename+".tre -f "+ alignment_filename +" "+ base_filename+".snp_sites.aln", shell=True)
try:
subprocess.check_call(gubbins_exec+" -r -v "+base_filename+".vcf -t "+base_filename+".tre -f "+ alignment_filename +" "+ base_filename+".snp_sites.aln", shell=True)
except:
sys.exit("Failed while running Gubbins")
GubbinsCommon.rename_files(GubbinsCommon.translation_of_filenames_to_final_filenames_pairwise(base_filename, base_filename_without_ext))

@staticmethod
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
##gff-version 3
##sequence-region SEQUENCE 1 242
SEQUENCE GUBBINS CDS 29 49 0.000 . 0 node="N7->sequence_6";neg_log_likelihood="4.955311"taxa="sequence_6";snp_count="21"
SEQUENCE GUBBINS CDS 29 84 0.000 . 0 node="N5->N6";neg_log_likelihood="12.082148"taxa=" sequence_9 sequence_6 sequence_8 sequence_7";snp_count="51"
SEQUENCE GUBBINS CDS 51 84 0.000 . 0 node="N4->N5";neg_log_likelihood="10.195830"taxa=" sequence_5 sequence_9 sequence_6 sequence_8 sequence_7";snp_count="30"
SEQUENCE GUBBINS CDS 51 84 0.000 . 0 node="N1->N4";neg_log_likelihood="10.195830"taxa=" sequence_1 sequence_5 sequence_9 sequence_6 sequence_8 sequence_7";snp_count="30"
SEQUENCE GUBBINS CDS 29 49 0.000 . 0 node="N7->sequence_6";neg_log_likelihood="4.955311";taxa="sequence_6";snp_count="21";
SEQUENCE GUBBINS CDS 29 84 0.000 . 0 node="N5->N6";neg_log_likelihood="12.082148";taxa=" sequence_9 sequence_6 sequence_8 sequence_7";snp_count="51";
SEQUENCE GUBBINS CDS 51 84 0.000 . 0 node="N4->N5";neg_log_likelihood="10.195830";taxa=" sequence_5 sequence_9 sequence_6 sequence_8 sequence_7";snp_count="30";
SEQUENCE GUBBINS CDS 51 84 0.000 . 0 node="N1->N4";neg_log_likelihood="10.195830";taxa=" sequence_1 sequence_5 sequence_9 sequence_6 sequence_8 sequence_7";snp_count="30";
2 changes: 1 addition & 1 deletion release/manifests/trustyvm.pp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
ensure => "installed"
}

package { ["gcc", "build-essential", "pkg-config"]:
package { ["gcc", "build-essential", "pkg-config","ntp"]:
ensure => "installed"
}

Expand Down
1 change: 1 addition & 0 deletions src/alignment_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "vcf.h"
#include "alignment_file.h"
#include "snp_sites.h"
#include "string_cat.h"

KSEQ_INIT(gzFile, gzread)

Expand Down
4 changes: 2 additions & 2 deletions src/gff_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ void print_gff_line(FILE * gff_file_pointer, int start_coordinate, int end_coord
fprintf(gff_file_pointer, "0.000\t.\t0\t");

fprintf(gff_file_pointer, "node=\"%s->%s\";", parent_node_id, current_node_id );
fprintf(gff_file_pointer, "neg_log_likelihood=\"%f\"", neg_log_likelihood);
fprintf(gff_file_pointer, "neg_log_likelihood=\"%f\";", neg_log_likelihood);
fprintf(gff_file_pointer, "taxa=\"%s\";", taxon_names);
fprintf(gff_file_pointer, "snp_count=\"%d\"", number_of_snps);
fprintf(gff_file_pointer, "snp_count=\"%d\";", number_of_snps);
fprintf(gff_file_pointer, "\n");

fflush(gff_file_pointer);
Expand Down
3 changes: 2 additions & 1 deletion src/gubbins.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@
#include "tree_scaling.h"
#include "seqUtil.h"
#include "Newickform.h"

#include "tree_statistics.h"
#include "fasta_of_snp_sites.h"


// get reference sequence from VCF, and store snp locations
Expand Down
6 changes: 1 addition & 5 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,6 @@ void print_usage(FILE* stream, int exit_code)
" -m Min SNPs for identifying a recombination block\n"
" -h Display this usage information.\n\n"
);

fprintf (stream, "Step 1: Detect SNP sites (generates inputs files for step 2)\n");
fprintf (stream, "gubbins alignment_file\n\n", program_name);
fprintf (stream, "Step 2: Detect recombinations\n");
fprintf (stream, "gubbins -r -v vcf_file -t newick_tree -f original.aln -m 10 alignment_file\n\n", program_name);
exit (exit_code);
}

Expand All @@ -64,6 +59,7 @@ int check_file_exists_or_exit(char * filename)
} else {
printf("Error: File '%s' doesnt exist\n",filename);
print_usage(stderr, EXIT_FAILURE);
return 0;
}
}

Expand Down
2 changes: 2 additions & 0 deletions src/parse_phylip.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ int get_internal_node(int sequence_index);
void fill_in_unambiguous_bases_in_parent_from_children_where_parent_has_a_gap(int parent_sequence_index, int * child_sequence_indices, int num_children);
void fill_in_unambiguous_gaps_in_parent_from_children(int parent_sequence_index, int * child_sequence_indices, int num_children);
void freeup_memory();
void set_number_of_bases_in_recombinations(char * sample_name, int bases_in_recombinations);
void filter_sequence_bases_and_rotate(char * reference_bases, char ** filtered_bases_for_snps, int number_of_filtered_snps);

#define MAX_READ_BUFFER 65536
#define MAX_SAMPLE_NAME_SIZE 1024
Expand Down
1 change: 1 addition & 0 deletions src/seqUtil.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#define __SEQUTIL_C__

#include "seqUtil.h"
#include "string_cat.h"

/*
* Yu-Wei Wu http://yuweibioinfo.blogspot.com/2008/10/newick-tree-parser-in-c-make-use-of.html
Expand Down
1 change: 1 addition & 0 deletions src/seqUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "string_cat.h"

/*
* Yu-Wei Wu http://yuweibioinfo.blogspot.com/2008/10/newick-tree-parser-in-c-make-use-of.html
Expand Down
1 change: 1 addition & 0 deletions src/snp_searching.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "snp_searching.h"

// Most of the methods in this file look the same, so should be DRYed out.
Expand Down
2 changes: 2 additions & 0 deletions src/snp_searching.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,7 @@ int advance_window_start_to_next_snp_with_start_index(int window_start_coordinat
int rewind_window_end_to_last_snp_with_start_end_index(int window_end_coordinate, int * snp_locations, char * child_sequence, int number_of_branch_snps, int start_index,int end_index);
int find_number_of_snps_in_block_with_start_end_index(int window_start_coordinate, int window_end_coordinate, int * snp_locations, char * child_sequence, int number_of_snps, int start_index,int end_index);
int get_window_end_coordinates_excluding_gaps_with_start_end_index(int window_start_coordinate, int window_size, int * snp_locations, char * child_sequence, int number_of_snps, int start_index,int end_index);
int calculate_block_size_without_gaps_with_start_end_index(char * child_sequence, int * snp_locations, int starting_coordinate, int ending_coordinate, int length_of_original_genome, int start_index,int end_index);


#endif
1 change: 1 addition & 0 deletions src/snp_sites.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "phylip_of_snp_sites.h"
#include "parse_phylip.h"
#include "string_cat.h"
#include "fasta_of_snp_sites.h"


void build_snp_locations(int snp_locations[], char reference_sequence[])
Expand Down
2 changes: 1 addition & 1 deletion src/vcf.c
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ void output_vcf_row_samples_bases(FILE * vcf_file_pointer, char reference_base,
{
continue;
}
if((bases_for_snp[i] == reference_base))
if(bases_for_snp[i] == reference_base)
{
fprintf( vcf_file_pointer, "%c", (char) reference_base );
}
Expand Down
2 changes: 1 addition & 1 deletion tests/data/one_recombination.tre.expected.gff
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
##gff-version 3
##sequence-region SEQUENCE 1 135
SEQUENCE GUBBINS CDS 63 167 0.000 . 0 node="N5->sequence_2";taxa="sequence_2";snp_count="103"
SEQUENCE GUBBINS CDS 63 167 0.000 . 0 node="N5->sequence_2";taxa="sequence_2";snp_count="103";

0 comments on commit 4b7486a

Please sign in to comment.