-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcoverage.py
32 lines (26 loc) · 963 Bytes
/
coverage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# 1. Read the results of target coverage length
# 2. Normalize for reference length to produce customary coverage statistic
# Output is a series of lines with tab-separated fields: sample_id, coverage
# The input file is in the aDNA counting statistics format
import sys
filename = sys.argv[1]
reference_length = int(sys.argv[2])
sample_id_arg = sys.argv[3]
field = sys.argv[4]
with open(filename) as f:
# skip the first line
f.readline()
for line in f:
#key label1 value1 label2 value2 ...
tokens = str.strip(line).split('\t')
sampleID = tokens[0]
labels = tokens[1:len(tokens):2]
values = tokens[2:len(tokens):2]
if sampleID == sample_id_arg:
for n in range(0, len(labels)):
if field == labels[n]:
coverage = float(values[n]) / reference_length
print("{}\t{:f}".format(sample_id_arg, coverage))
sys.exit(0)
# if the value does not appear, there is no coverage so print 0
print("{}\t{:f}".format(sample_id_arg, 0.0))