From 296af56675cf2d511e22ceb39ea6bfc704928adb Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Mon, 8 Dec 2014 12:59:19 +0000 Subject: [PATCH 1/4] update rho theta calculation --- VERSION | 2 +- debian/changelog | 6 ++++++ src/tree_statistics.c | 18 ++---------------- src/tree_statistics.h | 2 +- 4 files changed, 10 insertions(+), 18 deletions(-) diff --git a/VERSION b/VERSION index ece61c60..f9cbc01a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.0.6 \ No newline at end of file +1.0.7 \ No newline at end of file diff --git a/debian/changelog b/debian/changelog index 89eef9d9..542572da 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +gubbins (1.0.7~trusty1) trusty; urgency=low + + * Update rho/theta calculation to blocks/snps outside recombinations + + -- Andrew Page Mon, 8 Dec 2014 12:58:00 +0000 + gubbins (1.0.6~trusty1) trusty; urgency=low * Drawer allow for variation tag diff --git a/src/tree_statistics.c b/src/tree_statistics.c index cf5b2282..8fb86244 100644 --- a/src/tree_statistics.c +++ b/src/tree_statistics.c @@ -47,7 +47,7 @@ void create_tree_statistics_file(char filename[], sample_statistics ** statistic fprintf( file_pointer, "%i\t", sample_details->number_of_blocks); fprintf( file_pointer, "%i\t", sample_details->bases_in_recombinations); fprintf( file_pointer, "%f\t", recombination_to_mutation_ratio(sample_details->number_of_recombinations, (sample_details->number_of_snps))); - fprintf( file_pointer, "%f\t", recombination_blocks_to_mutation_ratio(sample_details->number_of_blocks,estimate_snps_genome_would_have_without_recombinations(sample_details->number_of_snps, sample_details->genome_length_without_gaps,sample_details->bases_in_recombinations))); + fprintf( file_pointer, "%f\t", rho_theta(sample_details->number_of_blocks,sample_details->number_of_snps)); fprintf( file_pointer, "%i", sample_details->genome_length_without_gaps); fprintf( file_pointer, "\n"); @@ -71,7 +71,7 @@ float recombination_to_mutation_ratio(int number_of_recombinations, int number_o return (number_of_recombinations*1.0)/(number_of_snps*1.0); } -float recombination_blocks_to_mutation_ratio(int number_of_blocks, int number_of_snps) +float rho_theta(int number_of_blocks, int number_of_snps) { if(number_of_snps == 0 || number_of_blocks == 0) { @@ -80,18 +80,4 @@ float recombination_blocks_to_mutation_ratio(int number_of_blocks, int number_of return (number_of_blocks*1.0)/(number_of_snps*1.0); } -int estimate_snps_genome_would_have_without_recombinations(int number_of_snps_outside_recomb, int genome_length_without_gaps,int bases_in_recombinations ) -{ - if(genome_length_without_gaps == 0 || genome_length_without_gaps - bases_in_recombinations == 0) - { - return 0; - } - if(bases_in_recombinations == 0) - { - return number_of_snps_outside_recomb; - } - return (int) (number_of_snps_outside_recomb*genome_length_without_gaps)/(genome_length_without_gaps - bases_in_recombinations); -} - - diff --git a/src/tree_statistics.h b/src/tree_statistics.h index d756f3f8..c49f5078 100644 --- a/src/tree_statistics.h +++ b/src/tree_statistics.h @@ -22,7 +22,7 @@ void create_tree_statistics_file(char filename[], sample_statistics ** statistics_for_samples, int number_of_samples); float recombination_to_mutation_ratio(int number_of_recombinations, int number_of_snps); - float recombination_blocks_to_mutation_ratio(int number_of_blocks, int number_of_snps); + float rho_theta(int number_of_blocks, int number_of_snps); #define MAX_FILE_NAME_SIZE 1024 #endif From c9174d1a31477a9d44cc289401d8ef34afb3db82 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Mon, 8 Dec 2014 14:03:14 +0000 Subject: [PATCH 2/4] check cpu flags for raxml --- python/gubbins/common.py | 10 +++++++++- python/setup.py | 3 ++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/python/gubbins/common.py b/python/gubbins/common.py index d609b6b8..96a3c364 100644 --- a/python/gubbins/common.py +++ b/python/gubbins/common.py @@ -32,6 +32,7 @@ from Bio.Align import MultipleSeqAlignment from Bio.Seq import Seq from cStringIO import StringIO +from cpuinfo import cpuinfo import shutil class GubbinsError(Exception): @@ -85,7 +86,14 @@ def does_file_exist(alignment_filename, file_type_msg): @staticmethod def choose_executable(list_of_executables): + processorinfo = cpuinfo.get_cpu_info() + for executable in list_of_executables: + if re.search('AVX', executable) and 'avx' not in processorinfo['flags']: + next + elif re.search('SSE3', executable) and 'ssse3' not in processorinfo['flags']: + next + if GubbinsCommon.which(executable) != None: return executable @@ -103,7 +111,7 @@ def parse_and_run(self): self.args.threads = 2 raxml_executables = ['raxmlHPC-PTHREADS-AVX','raxmlHPC-PTHREADS-SSE3','raxmlHPC-PTHREADS'] print "Trying PTHREADS version of raxml because no single threaded version of raxml could be found. Just to warn you, this requires 2 threads.\n" - raxml_executable = GubbinsCommon.choose_raxml_executable(raxml_executables) + raxml_executable = GubbinsCommon.choose_executable(raxml_executables) RAXML_EXEC = raxml_executable+' -f d -p 1 -m GTRGAMMA' if re.search('PTHREADS', str(RAXML_EXEC)) != None: diff --git a/python/setup.py b/python/setup.py index fbd99a0a..daaeb5be 100755 --- a/python/setup.py +++ b/python/setup.py @@ -33,7 +33,8 @@ 'Biopython >= 1.59', 'DendroPy >= 3.12.0', 'Reportlab >= 2.5', - 'nose >= 1.3' + 'nose >= 1.3', + 'py-cpuinfo >= 0.1.2' ], license='GPL', ) From c26a3d9c6a42addbab952d332ca7e1f3a8003c6b Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Mon, 8 Dec 2014 14:22:57 +0000 Subject: [PATCH 3/4] use continue rather than next --- python/gubbins/common.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/gubbins/common.py b/python/gubbins/common.py index 96a3c364..eb2e3809 100644 --- a/python/gubbins/common.py +++ b/python/gubbins/common.py @@ -90,9 +90,9 @@ def choose_executable(list_of_executables): for executable in list_of_executables: if re.search('AVX', executable) and 'avx' not in processorinfo['flags']: - next - elif re.search('SSE3', executable) and 'ssse3' not in processorinfo['flags']: - next + continue + elif re.search('SSE3', executable) and 'ssse3' not in processorinfo['flags']: + continue if GubbinsCommon.which(executable) != None: return executable @@ -915,7 +915,7 @@ def extract_recombinations_from_embl(filename): if searchObj != None: start_coord = int(searchObj.group(1)) end_coord = int(searchObj.group(2)) - next + continue if start_coord >= 0 and end_coord >= 0: searchTaxa = re.search('taxa\=\"([^"]+)\"', line) @@ -929,7 +929,7 @@ def extract_recombinations_from_embl(filename): start_coord = -1 end_coord = -1 - next + continue fh.close() return sequences_to_coords @@ -941,7 +941,7 @@ def have_recombinations_been_seen_before(current_file, previous_files = []): for previous_file in previous_files: if not os.path.exists(previous_file): - next + continue previous_file_recombinations = GubbinsCommon.extract_recombinations_from_embl(previous_file) if current_file_recombinations == previous_file_recombinations: return 1 From 6a054d2ce6becb0af305ac209c08494bc1c738ff Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Mon, 8 Dec 2014 14:29:41 +0000 Subject: [PATCH 4/4] check more iterations in test for prev seen recombinations --- python/gubbins/tests/test_external_dependancies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/gubbins/tests/test_external_dependancies.py b/python/gubbins/tests/test_external_dependancies.py index 33ed5bef..f3f2379c 100644 --- a/python/gubbins/tests/test_external_dependancies.py +++ b/python/gubbins/tests/test_external_dependancies.py @@ -78,7 +78,7 @@ def test_recombination_convergence(self): gubbins_runner = common.GubbinsCommon(parser.parse_args(["--converge_method", "recombination", "--iterations", '15','--no_cleanup', 'gubbins/tests/data/multiple_recombinations.aln'])) gubbins_runner.parse_and_run() - assert common.GubbinsCommon.have_recombinations_been_seen_before('multiple_recombinations.recombination_predictions.embl', ['RAxML_result.multiple_recombinations.iteration_1.tab','RAxML_result.multiple_recombinations.iteration_2.tab','RAxML_result.multiple_recombinations.iteration_3.tab','RAxML_result.multiple_recombinations.iteration_4.tab','RAxML_result.multiple_recombinations.iteration_5.tab','RAxML_result.multiple_recombinations.iteration_6.tab','RAxML_result.multiple_recombinations.iteration_7.tab','RAxML_result.multiple_recombinations.iteration_8.tab']) == 1 + assert common.GubbinsCommon.have_recombinations_been_seen_before('multiple_recombinations.recombination_predictions.embl', ['RAxML_result.multiple_recombinations.iteration_1.tab','RAxML_result.multiple_recombinations.iteration_2.tab','RAxML_result.multiple_recombinations.iteration_3.tab','RAxML_result.multiple_recombinations.iteration_4.tab','RAxML_result.multiple_recombinations.iteration_5.tab','RAxML_result.multiple_recombinations.iteration_6.tab','RAxML_result.multiple_recombinations.iteration_7.tab','RAxML_result.multiple_recombinations.iteration_8.tab','RAxML_result.multiple_recombinations.iteration_9.tab','RAxML_result.multiple_recombinations.iteration_10.tab','RAxML_result.multiple_recombinations.iteration_11.tab','RAxML_result.multiple_recombinations.iteration_12.tab','RAxML_result.multiple_recombinations.iteration_13.tab','RAxML_result.multiple_recombinations.iteration_14.tab','RAxML_result.multiple_recombinations.iteration_15.tab']) == 1 os.remove('log.txt') r = glob.glob("RAxML_*")