Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update window creation #75

Merged
merged 9 commits into from
May 20, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
298 changes: 126 additions & 172 deletions src/branch_sequences.c

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions src/branch_sequences.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
char *generate_branch_sequences(newick_node *root, FILE *vcf_file_pointer,int * snp_locations, int number_of_snps, char** column_names, int number_of_columns, char * leaf_sequence, int length_of_original_genome, FILE * block_file_pointer, FILE * gff_file_pointer,int min_snps, FILE * branch_snps_file_pointer);
void identify_recombinations(int number_of_branch_snps, int * branches_snp_sites,int length_of_original_genome);
double calculate_snp_density(int * branches_snp_sites, int number_of_branch_snps, int index);
void get_likelihood_for_windows(char * child_sequence, int length_of_sequence, int * snp_site_coords, int branch_genome_size, int number_of_branch_snps, int * snp_locations, newick_node * current_node, FILE * block_file_pointer, newick_node *root, char * branch_snp_sequence, FILE * gff_file_pointer,int min_snps);
void get_likelihood_for_windows(char * child_sequence, int length_of_sequence, int * snp_site_coords, int branch_genome_size, int number_of_branch_snps, int * snp_locations, newick_node * current_node, FILE * block_file_pointer, newick_node *root, char * branch_snp_sequence, FILE * gff_file_pointer,int min_snps, int length_of_original_genome);
double get_block_likelihood(int branch_genome_size, int number_of_branch_snps, int block_genome_size_without_gaps, int number_of_block_snps);
int calculate_window_size(int branch_genome_size, int number_of_branch_snps);
int calculate_window_size(int branch_genome_size, int number_of_branch_snps);
Expand All @@ -35,16 +35,16 @@ void fill_in_recombinations_with_gaps(newick_node *root, int * parent_recombinat
int copy_and_concat_integer_arrays(int * array_1, int array_1_size, int * array_2, int array_2_size, int * output_array);
double snp_density(int length_of_sequence, int number_of_snps);
int calculate_cutoff(int branch_genome_size, int window_size, int num_branch_snps);
int merge_adjacent_blocks(int ** block_coordinates, int number_of_blocks, char * branch_snp_sequence, int number_of_bases, int * snp_site_coords);
int get_smallest_log_likelihood(int ** candidate_blocks, int number_of_candidate_blocks);
int merge_adjacent_blocks(int ** block_coordinates, int number_of_blocks, char * branch_snp_sequence, int number_of_bases, int * snp_site_coords, double * block_likelihoods);
int get_smallest_log_likelihood(double * candidate_blocks, int number_of_candidate_blocks);
int exclude_snp_sites_in_block(int window_start_coordinate, int window_end_coordinate, int * snp_site_coords, int number_of_branch_snps);
int flag_smallest_log_likelihood_recombinations(int ** candidate_blocks, int number_of_candidate_blocks, int number_of_branch_snps, int * snp_site_coords, int * recombinations, int number_of_recombinations,newick_node * current_node, FILE * block_file_pointer, newick_node *root,int * snp_locations, int total_num_snps, FILE * gff_file_pointer);
int flag_smallest_log_likelihood_recombinations(int ** candidate_blocks, int number_of_candidate_blocks, int number_of_branch_snps, int * snp_site_coords, int * recombinations, int number_of_recombinations,newick_node * current_node, FILE * block_file_pointer, newick_node *root,int * snp_locations, int total_num_snps, FILE * gff_file_pointer, double * block_likelihooods);
int extend_end_of_block_left_over_gap(int block_coord, char * branch_snp_sequence, int number_of_bases, int * snp_site_coords);
int extend_end_of_block_right_over_gap(int block_coord, char * branch_snp_sequence, int number_of_bases, int * snp_site_coords);
int calculate_number_of_bases_in_recombations_excluding_gaps(int ** block_coordinates, int num_blocks,char * child_sequence, int * snp_locations,int length_of_original_genome);
void carry_unambiguous_gaps_up_tree(newick_node *root);
void move_blocks_inwards_while_likelihood_improves(int number_of_blocks,int ** block_coordinates, int min_snps, int * snp_site_coords, int number_of_branch_snps,char * branch_snp_sequence, int * snp_locations, int branch_genome_size,char * child_sequence, int length_of_sequence);

void move_blocks_inwards_while_likelihood_improves(int number_of_blocks,int ** block_coordinates, int min_snps, int * snp_site_coords, int number_of_branch_snps,char * branch_snp_sequence, int * snp_locations, int branch_genome_size,char * child_sequence, int length_of_sequence, double * block_likelihoods, int cutoff_value);
int get_blocks(int ** block_coordinates, int branch_genome_size,int * snp_site_coords,int number_of_branch_snps, int window_size, int cutoff);

#define WINDOW_SNP_MODE_TARGET 10
#define MIN_WINDOW_SIZE 100
Expand Down
44 changes: 34 additions & 10 deletions tests/check_branch_sequences.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,13 @@
//merge_adjacent_blocks(int ** block_coordinates, int number_of_blocks)
START_TEST (check_merge_adjacent_blocks_not_adjacent)
{
int * blocks_not_adjacent[2];
int * blocks_not_adjacent[4];
blocks_not_adjacent[0] = (int *) malloc((3)*sizeof(int));
blocks_not_adjacent[1] = (int *) malloc((3)*sizeof(int));
blocks_not_adjacent[2] = (int *) malloc((3)*sizeof(int));
blocks_not_adjacent[3] = (int *) malloc((3)*sizeof(int));
double block_likelihood[4];

blocks_not_adjacent[0][0] = 10;
blocks_not_adjacent[1][0] = 20;
blocks_not_adjacent[0][1] = 1000;
Expand All @@ -23,7 +27,7 @@ START_TEST (check_merge_adjacent_blocks_not_adjacent)
int snp_site_coords[9] = {10};
int number_of_bases = 1;

fail_unless(merge_adjacent_blocks(blocks_not_adjacent,2, branch_sequence,number_of_bases,snp_site_coords) == 2);
fail_unless(merge_adjacent_blocks(blocks_not_adjacent,2, branch_sequence,number_of_bases,snp_site_coords,block_likelihood) == 2);
fail_unless(blocks_not_adjacent[0][0] == 10 );
fail_unless(blocks_not_adjacent[1][0] == 20 );
fail_unless(blocks_not_adjacent[0][1] == 1000);
Expand All @@ -34,19 +38,25 @@ END_TEST
START_TEST (check_merge_adjacent_blocks_beside_each_other)
{

int * blocks_beside_each_other[2];
int * blocks_beside_each_other[4];
blocks_beside_each_other[0] = (int *) malloc((3)*sizeof(int));
blocks_beside_each_other[1] = (int *) malloc((3)*sizeof(int));
blocks_beside_each_other[2] = (int *) malloc((3)*sizeof(int));
blocks_beside_each_other[3] = (int *) malloc((3)*sizeof(int));
blocks_beside_each_other[0][0] = 10;
blocks_beside_each_other[1][0] = 20;
blocks_beside_each_other[0][1] = 20;
blocks_beside_each_other[1][1] = 30;

double * block_likelihood;
block_likelihood = (double *) malloc((4+1)*sizeof(double));


char *branch_sequence = "A";
int snp_site_coords[9] = {10};
int number_of_bases = 1;

fail_unless(merge_adjacent_blocks(blocks_beside_each_other,2, branch_sequence,number_of_bases,snp_site_coords) == 1);
fail_unless(merge_adjacent_blocks(blocks_beside_each_other,2, branch_sequence,number_of_bases,snp_site_coords,block_likelihood) == 1);
fail_unless(blocks_beside_each_other[0][0] == 10 );
fail_unless(blocks_beside_each_other[1][0] == 30 );
fail_unless(blocks_beside_each_other[0][1] == 0);
Expand All @@ -56,19 +66,24 @@ END_TEST

START_TEST (check_merge_adjacent_blocks_near_each_other)
{
int * blocks_near_each_other[2];
int * blocks_near_each_other[4];
blocks_near_each_other[0] = (int *) malloc((3)*sizeof(int));
blocks_near_each_other[1] = (int *) malloc((3)*sizeof(int));
blocks_near_each_other[2] = (int *) malloc((3)*sizeof(int));
blocks_near_each_other[3] = (int *) malloc((3)*sizeof(int));
blocks_near_each_other[0][0] = 10;
blocks_near_each_other[1][0] = 20;
blocks_near_each_other[0][1] = 21;
blocks_near_each_other[1][1] = 30;

double * block_likelihood;
block_likelihood = (double *) malloc((4+1)*sizeof(double));

char *branch_sequence = "A";
int snp_site_coords[9] = {10};
int number_of_bases = 1;

fail_unless(merge_adjacent_blocks(blocks_near_each_other,2, branch_sequence,number_of_bases,snp_site_coords) == 1);
fail_unless(merge_adjacent_blocks(blocks_near_each_other,2, branch_sequence,number_of_bases,snp_site_coords,block_likelihood) == 1);
fail_unless(blocks_near_each_other[0][0] == 10 );
fail_unless(blocks_near_each_other[1][0] == 30 );
fail_unless(blocks_near_each_other[0][1] == 0);
Expand All @@ -78,9 +93,11 @@ END_TEST

START_TEST (check_merge_adjacent_blocks_overlapping)
{
int * blocks_overlapping[2];
int * blocks_overlapping[4];
blocks_overlapping[0] = (int *) malloc((3)*sizeof(int));
blocks_overlapping[1] = (int *) malloc((3)*sizeof(int));
blocks_overlapping[2] = (int *) malloc((3)*sizeof(int));
blocks_overlapping[3] = (int *) malloc((3)*sizeof(int));
blocks_overlapping[0][0] = 10;
blocks_overlapping[1][0] = 20;
blocks_overlapping[0][1] = 19;
Expand All @@ -90,7 +107,10 @@ START_TEST (check_merge_adjacent_blocks_overlapping)
int snp_site_coords[9] = {10};
int number_of_bases = 1;

fail_unless(merge_adjacent_blocks(blocks_overlapping,2, branch_sequence,number_of_bases,snp_site_coords) == 1);
double * block_likelihood;
block_likelihood = (double *) malloc((4+1)*sizeof(double));

fail_unless(merge_adjacent_blocks(blocks_overlapping,2, branch_sequence,number_of_bases,snp_site_coords,block_likelihood) == 1);
fail_unless(blocks_overlapping[0][0] == 10 );
fail_unless(blocks_overlapping[1][0] == 30 );
fail_unless(blocks_overlapping[0][1] == 0);
Expand All @@ -102,9 +122,11 @@ END_TEST

START_TEST (check_merge_block_straddling_gap)
{
int * blocks_overlapping[2];
int * blocks_overlapping[4];
blocks_overlapping[0] = (int *) malloc((3)*sizeof(int));
blocks_overlapping[1] = (int *) malloc((3)*sizeof(int));
blocks_overlapping[2] = (int *) malloc((3)*sizeof(int));
blocks_overlapping[3] = (int *) malloc((3)*sizeof(int));
blocks_overlapping[0][0] = 10;
blocks_overlapping[1][0] = 40;
blocks_overlapping[0][1] = 44;
Expand All @@ -113,8 +135,10 @@ START_TEST (check_merge_block_straddling_gap)
char *branch_sequence = "AAA---CCC";
int snp_site_coords[9] = {10,30,40,41,42,43,44,60,70};
int number_of_bases = 9;
double * block_likelihood;
block_likelihood = (double *) malloc((4+1)*sizeof(double));

fail_unless(merge_adjacent_blocks(blocks_overlapping,2, branch_sequence,number_of_bases,snp_site_coords) == 1);
fail_unless(merge_adjacent_blocks(blocks_overlapping,2, branch_sequence,number_of_bases,snp_site_coords,block_likelihood) == 1);

fail_unless(blocks_overlapping[0][0] == 10);
fail_unless(blocks_overlapping[1][0] == 70);
Expand Down
1 change: 1 addition & 0 deletions tests/data/multiple_recombinations.tre
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
(sequence_3:0.000000,sequence_4:1.059462,(sequence_2:0.000000,(sequence_5:0.788739,(((sequence_7:0.000000,sequence_8:0.000000)N6:0.000000,(sequence_6:0.000000,sequence_1:3.286377)N7:41.232967)N5:0.138024,(sequence_10:1065.431519,sequence_9:0.067308)N8:1.130817)N4:191.396896)N3:1.270971)N2:1.060740)N1:0.000000;
Loading