Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rho theta #119

Merged
merged 4 commits into from
Dec 8, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.0.6
1.0.7
6 changes: 6 additions & 0 deletions debian/changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
gubbins (1.0.7~trusty1) trusty; urgency=low

* Update rho/theta calculation to blocks/snps outside recombinations

-- Andrew Page <ap13@sanger.ac.uk> Mon, 8 Dec 2014 12:58:00 +0000

gubbins (1.0.6~trusty1) trusty; urgency=low

* Drawer allow for variation tag
Expand Down
16 changes: 12 additions & 4 deletions python/gubbins/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from Bio.Align import MultipleSeqAlignment
from Bio.Seq import Seq
from cStringIO import StringIO
from cpuinfo import cpuinfo
import shutil

class GubbinsError(Exception):
Expand Down Expand Up @@ -85,7 +86,14 @@ def does_file_exist(alignment_filename, file_type_msg):

@staticmethod
def choose_executable(list_of_executables):
processorinfo = cpuinfo.get_cpu_info()

for executable in list_of_executables:
if re.search('AVX', executable) and 'avx' not in processorinfo['flags']:
continue
elif re.search('SSE3', executable) and 'ssse3' not in processorinfo['flags']:
continue

if GubbinsCommon.which(executable) != None:
return executable

Expand All @@ -103,7 +111,7 @@ def parse_and_run(self):
self.args.threads = 2
raxml_executables = ['raxmlHPC-PTHREADS-AVX','raxmlHPC-PTHREADS-SSE3','raxmlHPC-PTHREADS']
print "Trying PTHREADS version of raxml because no single threaded version of raxml could be found. Just to warn you, this requires 2 threads.\n"
raxml_executable = GubbinsCommon.choose_raxml_executable(raxml_executables)
raxml_executable = GubbinsCommon.choose_executable(raxml_executables)

RAXML_EXEC = raxml_executable+' -f d -p 1 -m GTRGAMMA'
if re.search('PTHREADS', str(RAXML_EXEC)) != None:
Expand Down Expand Up @@ -907,7 +915,7 @@ def extract_recombinations_from_embl(filename):
if searchObj != None:
start_coord = int(searchObj.group(1))
end_coord = int(searchObj.group(2))
next
continue

if start_coord >= 0 and end_coord >= 0:
searchTaxa = re.search('taxa\=\"([^"]+)\"', line)
Expand All @@ -921,7 +929,7 @@ def extract_recombinations_from_embl(filename):

start_coord = -1
end_coord = -1
next
continue
fh.close()
return sequences_to_coords

Expand All @@ -933,7 +941,7 @@ def have_recombinations_been_seen_before(current_file, previous_files = []):

for previous_file in previous_files:
if not os.path.exists(previous_file):
next
continue
previous_file_recombinations = GubbinsCommon.extract_recombinations_from_embl(previous_file)
if current_file_recombinations == previous_file_recombinations:
return 1
Expand Down
2 changes: 1 addition & 1 deletion python/gubbins/tests/test_external_dependancies.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def test_recombination_convergence(self):
gubbins_runner = common.GubbinsCommon(parser.parse_args(["--converge_method", "recombination", "--iterations", '15','--no_cleanup', 'gubbins/tests/data/multiple_recombinations.aln']))
gubbins_runner.parse_and_run()

assert common.GubbinsCommon.have_recombinations_been_seen_before('multiple_recombinations.recombination_predictions.embl', ['RAxML_result.multiple_recombinations.iteration_1.tab','RAxML_result.multiple_recombinations.iteration_2.tab','RAxML_result.multiple_recombinations.iteration_3.tab','RAxML_result.multiple_recombinations.iteration_4.tab','RAxML_result.multiple_recombinations.iteration_5.tab','RAxML_result.multiple_recombinations.iteration_6.tab','RAxML_result.multiple_recombinations.iteration_7.tab','RAxML_result.multiple_recombinations.iteration_8.tab']) == 1
assert common.GubbinsCommon.have_recombinations_been_seen_before('multiple_recombinations.recombination_predictions.embl', ['RAxML_result.multiple_recombinations.iteration_1.tab','RAxML_result.multiple_recombinations.iteration_2.tab','RAxML_result.multiple_recombinations.iteration_3.tab','RAxML_result.multiple_recombinations.iteration_4.tab','RAxML_result.multiple_recombinations.iteration_5.tab','RAxML_result.multiple_recombinations.iteration_6.tab','RAxML_result.multiple_recombinations.iteration_7.tab','RAxML_result.multiple_recombinations.iteration_8.tab','RAxML_result.multiple_recombinations.iteration_9.tab','RAxML_result.multiple_recombinations.iteration_10.tab','RAxML_result.multiple_recombinations.iteration_11.tab','RAxML_result.multiple_recombinations.iteration_12.tab','RAxML_result.multiple_recombinations.iteration_13.tab','RAxML_result.multiple_recombinations.iteration_14.tab','RAxML_result.multiple_recombinations.iteration_15.tab']) == 1

os.remove('log.txt')
r = glob.glob("RAxML_*")
Expand Down
3 changes: 2 additions & 1 deletion python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@
'Biopython >= 1.59',
'DendroPy >= 3.12.0',
'Reportlab >= 2.5',
'nose >= 1.3'
'nose >= 1.3',
'py-cpuinfo >= 0.1.2'
],
license='GPL',
)
18 changes: 2 additions & 16 deletions src/tree_statistics.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ void create_tree_statistics_file(char filename[], sample_statistics ** statistic
fprintf( file_pointer, "%i\t", sample_details->number_of_blocks);
fprintf( file_pointer, "%i\t", sample_details->bases_in_recombinations);
fprintf( file_pointer, "%f\t", recombination_to_mutation_ratio(sample_details->number_of_recombinations, (sample_details->number_of_snps)));
fprintf( file_pointer, "%f\t", recombination_blocks_to_mutation_ratio(sample_details->number_of_blocks,estimate_snps_genome_would_have_without_recombinations(sample_details->number_of_snps, sample_details->genome_length_without_gaps,sample_details->bases_in_recombinations)));
fprintf( file_pointer, "%f\t", rho_theta(sample_details->number_of_blocks,sample_details->number_of_snps));
fprintf( file_pointer, "%i", sample_details->genome_length_without_gaps);

fprintf( file_pointer, "\n");
Expand All @@ -71,7 +71,7 @@ float recombination_to_mutation_ratio(int number_of_recombinations, int number_o
return (number_of_recombinations*1.0)/(number_of_snps*1.0);
}

float recombination_blocks_to_mutation_ratio(int number_of_blocks, int number_of_snps)
float rho_theta(int number_of_blocks, int number_of_snps)
{
if(number_of_snps == 0 || number_of_blocks == 0)
{
Expand All @@ -80,18 +80,4 @@ float recombination_blocks_to_mutation_ratio(int number_of_blocks, int number_of
return (number_of_blocks*1.0)/(number_of_snps*1.0);
}

int estimate_snps_genome_would_have_without_recombinations(int number_of_snps_outside_recomb, int genome_length_without_gaps,int bases_in_recombinations )
{
if(genome_length_without_gaps == 0 || genome_length_without_gaps - bases_in_recombinations == 0)
{
return 0;
}
if(bases_in_recombinations == 0)
{
return number_of_snps_outside_recomb;
}
return (int) (number_of_snps_outside_recomb*genome_length_without_gaps)/(genome_length_without_gaps - bases_in_recombinations);
}



2 changes: 1 addition & 1 deletion src/tree_statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

void create_tree_statistics_file(char filename[], sample_statistics ** statistics_for_samples, int number_of_samples);
float recombination_to_mutation_ratio(int number_of_recombinations, int number_of_snps);
float recombination_blocks_to_mutation_ratio(int number_of_blocks, int number_of_snps);
float rho_theta(int number_of_blocks, int number_of_snps);
#define MAX_FILE_NAME_SIZE 1024

#endif
Expand Down