Skip to content

Commit

Permalink
propagate unambiguous bases up the tree
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewjpage committed Mar 14, 2013
1 parent 80ea76d commit e85404f
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 5 deletions.
1 change: 1 addition & 0 deletions src/branch_sequences.c
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ void carry_unambiguous_gaps_up_tree(newick_node *root)

// compare the parent sequence to the each child sequence and update the gaps
fill_in_unambiguous_gaps_in_parent_from_children(parent_sequence_index, child_sequence_indices,child_counter);
fill_in_unambiguous_bases_in_parent_from_children_where_parent_has_a_gap(parent_sequence_index, child_sequence_indices,child_counter);
}
}

Expand Down
35 changes: 35 additions & 0 deletions src/parse_phylip.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,41 @@ void fill_in_unambiguous_gaps_in_parent_from_children(int parent_sequence_index,
}
}

void fill_in_unambiguous_bases_in_parent_from_children_where_parent_has_a_gap(int parent_sequence_index, int * child_sequence_indices, int num_children)
{
int snp_counter = 0;

for(snp_counter = 0; snp_counter < num_snps ; snp_counter++)
{
if(toupper(sequences[parent_sequence_index][snp_counter]) != 'N' && sequences[parent_sequence_index][snp_counter] != '-')
{
break;
}

int real_base_found = 0;
int child_counter = 0;
char comparison_base ;
for(child_counter = 0 ; child_counter < num_children ; child_counter++)
{
int child_index = child_sequence_indices[child_counter];
if(child_counter == 0)
{
comparison_base = toupper(sequences[child_index][snp_counter]);
}

if(comparison_base != toupper(sequences[child_index][snp_counter]) )
{
break;
}
}

if(toupper(sequences[parent_sequence_index][snp_counter]) != comparison_base)
{
sequences[parent_sequence_index][snp_counter] = comparison_base;
}
}
}

int does_column_contain_snps(int snp_column, char reference_base)
{
int i;
Expand Down
2 changes: 2 additions & 0 deletions src/parse_phylip.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ void load_sequences_from_multifasta_file(char filename[]);
void set_internal_node(int internal_node_value,int sequence_index);
void initialise_internal_node();
int get_internal_node(int sequence_index);
void fill_in_unambiguous_bases_in_parent_from_children_where_parent_has_a_gap(int parent_sequence_index, int * child_sequence_indices, int num_children);
void fill_in_unambiguous_gaps_in_parent_from_children(int parent_sequence_index, int * child_sequence_indices, int num_children);

#define MAX_READ_BUFFER 65536
#define MAX_SAMPLE_NAME_SIZE 1024
Expand Down
14 changes: 14 additions & 0 deletions tests/check_parse_phylip.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,26 @@ START_TEST (phylip_read_in_file_with_gaps)
}
END_TEST

START_TEST (phylip_fill_in_unambiguous_bases_in_parent_from_children_where_parent_has_a_gap)
{
load_sequences_from_multifasta_file("../tests/data/alignment_with_gap_in_parent.aln");
int child_indices[2] = {0,1};
fill_in_unambiguous_bases_in_parent_from_children_where_parent_has_a_gap(2, child_indices, 2);
char sequence_bases[10];
get_sequence_for_sample_name(sequence_bases, "parent");
fail_unless( strcmp(sequence_bases, "AC--") == 0);
}
END_TEST



Suite * parse_phylip_suite(void)
{
Suite *s = suite_create ("Parsing a phylip file");
TCase *tc_phylip = tcase_create ("phylip_files");
tcase_add_test (tc_phylip, phylip_read_in_small_file);
tcase_add_test (tc_phylip, phylip_read_in_file_with_gaps);
tcase_add_test (tc_phylip, phylip_fill_in_unambiguous_bases_in_parent_from_children_where_parent_has_a_gap);
suite_add_tcase (s, tc_phylip);
return s;
}
10 changes: 5 additions & 5 deletions tests/data/one_recombination.expected.vcf
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
##fileformat=VCFv4.1
##FORMAT=<ID=AB,Number=1,Type=String,Description="Alt Base">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sequence_1 sequence_2 sequence_3 sequence_4 sequence_5 sequence_6 sequence_7 sequence_8 sequence_9 sequence_10
1 1 . A C,-,N . . . AB A C - - - - - - - A
1 6 . A -,C,N . . . AB A - C - - - - - - A
1 8 . A -,C,N . . . AB A - C - - - - - - A
1 9 . A -,C,N . . . AB A - - C - - - - - A
1 11 . A -,C,N . . . AB A - - C - - - - - A
1 1 . A C,- . . . AB A C - - - - - - - A
1 6 . A -,C . . . AB A - C - - - - - - A
1 8 . A -,C . . . AB A - C - - - - - - A
1 9 . A -,C . . . AB A - - C - - - - - A
1 11 . A -,C . . . AB A - - C - - - - - A
1 12 . A -,C,N . . . AB A - - - C - - - - A
1 13 . A C,-,N . . . AB A C - - - - - - - A
1 14 . A -,C,N . . . AB A - - - C - - - - A
Expand Down

0 comments on commit e85404f

Please sign in to comment.