Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use bases for gaps for internal nodes after fastml #70

Merged
merged 2 commits into from
Mar 15, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions run_gubbins.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,19 +324,25 @@ def reinsert_gaps_into_fasta_file(input_fasta_filename, input_vcf_file, output_f

sample_names = []
gap_position = []
gap_alt_base = []

for vcf_line in vcf_file:
if re.match('^#CHROM', vcf_line) != None :
sample_names = vcf_line.split('\t' )[9:-1]
elif re.match('^\d', vcf_line) != None :
# If the alternate is only a gap it wont have a base in this column
if re.match('^([^\t]+\t){4}([^ACGTacgt])\t', vcf_line) != None:
if re.match('^([^\t]+\t){3}([ACGTacgt])\t([^ACGTacgt])\t', vcf_line) != None:
m = re.match('^([^\t]+\t){3}([ACGTacgt])\t([^ACGTacgt])\t', vcf_line)
gap_position.append(1)
gap_alt_base.append(m.group(2))
elif re.match('^([^\t]+\t){3}([^ACGTacgt])\t([ACGTacgt])\t', vcf_line) != None:
# sometimes the ref can be a gap only
m = re.match('^([^\t]+\t){3}([^ACGTacgt])\t([ACGTacgt])\t', vcf_line)
gap_position.append(1)
gap_alt_base.append(m.group(3))
else:
gap_position.append(0)
gap_alt_base.append('-')

gapped_alignments = []
# interleave gap only and snp bases
Expand All @@ -350,14 +356,14 @@ def reinsert_gaps_into_fasta_file(input_fasta_filename, input_vcf_file, output_f
gap_index = 0
for input_base in record.seq:
while gap_index < len(gap_position) and gap_position[gap_index] == 1:
inserted_gaps.append('-')
inserted_gaps.append(gap_alt_base[gap_index])
gap_index+=1
if gap_index < len(gap_position):
inserted_gaps.append(input_base)
gap_index+=1

while gap_index < len(gap_position):
inserted_gaps.append('-')
inserted_gaps.append(gap_alt_base[gap_index])
gap_index+=1

record.seq = Seq(''.join(inserted_gaps))
Expand Down
6 changes: 6 additions & 0 deletions tests/data/alignment_with_gap_in_parent.aln
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
>child_1
AC--
>child2
ACNA
>parent
----