-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathhaplogroup.py
42 lines (34 loc) · 1.43 KB
/
haplogroup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# read the output of haplogrep to determine a mt haplogroup
import os
import sys
from pathlib import Path
# iterate over passed files
filenames = sys.argv[1: len(sys.argv)]
for filename in filenames:
with open(filename) as f:
# first line is haplogrep header
header = f.readline()
headerFields = header.split("\t")
sampleIndex = headerFields.index("SampleID")
haplogroupIndex = headerFields.index("Haplogroup")
rankIndex = headerFields.index("Overall_Rank")
notFoundPolysIndex = headerFields.index("Not_Found_Polys")
foundPolysIndex = headerFields.index("Found_Polys")
remainingPolysIndex = headerFields.index("Remaining_Polys")
# second line haplogrep data
line = f.readline()
dataFields = line.split("\t")
key = Path(dataFields[sampleIndex]).name
if key.endswith('.bam'):
key = Path(key).stem # filename without extension
haplogroup = dataFields[haplogroupIndex]
overallRank = float(dataFields[rankIndex])
notFoundPolys = dataFields[notFoundPolysIndex]
foundPolys = dataFields[foundPolysIndex]
remainingPolys = dataFields[remainingPolysIndex]
print(key, end='\t')
print("MT_Haplogroup\t{}".format(haplogroup), end='\t')
print("MT_Haplogroup_rank\t{:.3f}".format(overallRank), end='\t')
print("MT_Haplogroup_NotFoundPolys\t[{}]".format(notFoundPolys), end='\t')
print("MT_Haplogroup_FoundPolys\t[{}]".format(foundPolys), end='\t')
print("MT_Haplogroup_RemainingPolys\t[{}]".format(remainingPolys))