diff --git a/src/branch_sequences.c b/src/branch_sequences.c index a4c775ee..0554e107 100644 --- a/src/branch_sequences.c +++ b/src/branch_sequences.c @@ -216,6 +216,7 @@ void carry_unambiguous_gaps_up_tree(newick_node *root) // compare the parent sequence to the each child sequence and update the gaps fill_in_unambiguous_gaps_in_parent_from_children(parent_sequence_index, child_sequence_indices,child_counter); + fill_in_unambiguous_bases_in_parent_from_children_where_parent_has_a_gap(parent_sequence_index, child_sequence_indices,child_counter); } } diff --git a/src/parse_phylip.c b/src/parse_phylip.c index 8f4423a8..533da8c4 100644 --- a/src/parse_phylip.c +++ b/src/parse_phylip.c @@ -90,6 +90,41 @@ void fill_in_unambiguous_gaps_in_parent_from_children(int parent_sequence_index, } } +void fill_in_unambiguous_bases_in_parent_from_children_where_parent_has_a_gap(int parent_sequence_index, int * child_sequence_indices, int num_children) +{ + int snp_counter = 0; + + for(snp_counter = 0; snp_counter < num_snps ; snp_counter++) + { + if(toupper(sequences[parent_sequence_index][snp_counter]) != 'N' && sequences[parent_sequence_index][snp_counter] != '-') + { + break; + } + + int real_base_found = 0; + int child_counter = 0; + char comparison_base ; + for(child_counter = 0 ; child_counter < num_children ; child_counter++) + { + int child_index = child_sequence_indices[child_counter]; + if(child_counter == 0) + { + comparison_base = toupper(sequences[child_index][snp_counter]); + } + + if(comparison_base != toupper(sequences[child_index][snp_counter]) ) + { + break; + } + } + + if(toupper(sequences[parent_sequence_index][snp_counter]) != comparison_base) + { + sequences[parent_sequence_index][snp_counter] = comparison_base; + } + } +} + int does_column_contain_snps(int snp_column, char reference_base) { int i; diff --git a/src/parse_phylip.h b/src/parse_phylip.h index 57d9a616..02938c60 100644 --- a/src/parse_phylip.h +++ b/src/parse_phylip.h @@ -47,6 +47,8 @@ void load_sequences_from_multifasta_file(char filename[]); void set_internal_node(int internal_node_value,int sequence_index); void initialise_internal_node(); int get_internal_node(int sequence_index); +void fill_in_unambiguous_bases_in_parent_from_children_where_parent_has_a_gap(int parent_sequence_index, int * child_sequence_indices, int num_children); +void fill_in_unambiguous_gaps_in_parent_from_children(int parent_sequence_index, int * child_sequence_indices, int num_children); #define MAX_READ_BUFFER 65536 #define MAX_SAMPLE_NAME_SIZE 1024 diff --git a/tests/check_parse_phylip.c b/tests/check_parse_phylip.c index a58c3a44..86f0d717 100644 --- a/tests/check_parse_phylip.c +++ b/tests/check_parse_phylip.c @@ -69,12 +69,26 @@ START_TEST (phylip_read_in_file_with_gaps) } END_TEST +START_TEST (phylip_fill_in_unambiguous_bases_in_parent_from_children_where_parent_has_a_gap) +{ + load_sequences_from_multifasta_file("../tests/data/alignment_with_gap_in_parent.aln"); + int child_indices[2] = {0,1}; + fill_in_unambiguous_bases_in_parent_from_children_where_parent_has_a_gap(2, child_indices, 2); + char sequence_bases[10]; + get_sequence_for_sample_name(sequence_bases, "parent"); + fail_unless( strcmp(sequence_bases, "AC--") == 0); +} +END_TEST + + + Suite * parse_phylip_suite(void) { Suite *s = suite_create ("Parsing a phylip file"); TCase *tc_phylip = tcase_create ("phylip_files"); tcase_add_test (tc_phylip, phylip_read_in_small_file); tcase_add_test (tc_phylip, phylip_read_in_file_with_gaps); + tcase_add_test (tc_phylip, phylip_fill_in_unambiguous_bases_in_parent_from_children_where_parent_has_a_gap); suite_add_tcase (s, tc_phylip); return s; } diff --git a/tests/data/one_recombination.expected.vcf b/tests/data/one_recombination.expected.vcf index 88f504eb..2b69521d 100644 --- a/tests/data/one_recombination.expected.vcf +++ b/tests/data/one_recombination.expected.vcf @@ -1,11 +1,11 @@ ##fileformat=VCFv4.1 ##FORMAT= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sequence_1 sequence_2 sequence_3 sequence_4 sequence_5 sequence_6 sequence_7 sequence_8 sequence_9 sequence_10 -1 1 . A C,-,N . . . AB A C - - - - - - - A -1 6 . A -,C,N . . . AB A - C - - - - - - A -1 8 . A -,C,N . . . AB A - C - - - - - - A -1 9 . A -,C,N . . . AB A - - C - - - - - A -1 11 . A -,C,N . . . AB A - - C - - - - - A +1 1 . A C,- . . . AB A C - - - - - - - A +1 6 . A -,C . . . AB A - C - - - - - - A +1 8 . A -,C . . . AB A - C - - - - - - A +1 9 . A -,C . . . AB A - - C - - - - - A +1 11 . A -,C . . . AB A - - C - - - - - A 1 12 . A -,C,N . . . AB A - - - C - - - - A 1 13 . A C,-,N . . . AB A C - - - - - - - A 1 14 . A -,C,N . . . AB A - - - C - - - - A