-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathangsd_results.py
46 lines (40 loc) · 1.54 KB
/
angsd_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# Process the results of angsd and return them in the keyed statistics format
# We use "new method 1" results
"Method1: new_llh Version: MoM:0.006946 SE(MoM):2.138220e-03 ML:0.006724 SE(ML):7.099542e-15"
import sys
import re
def parse_angsd_results(filename):
nSNP_sites_pattern = re.compile('[\s]*We have nSNP sites:[\s]+([\d]+),')
stats_search = "Method1: new_llh Version: "
angsd = {
"nsites": 0,
}
with open(filename) as f:
for line in f:
site_result = nSNP_sites_pattern.match(line)
if site_result:
angsd["nsites"] = int(site_result.group(1))
if line.startswith(stats_search):
# errors start with 'contamination' on the same line with good data
# chop this off if we can use the good data
error_value = 'contamination'
if error_value in line:
line = line[0:line.index(error_value)]
# parse values
key_value_pairs = line[len(stats_search):].split()
for pair in key_value_pairs:
key, value = pair.split(':')
angsd[key] = float(value)
return angsd
if __name__ == '__main__':
angsd_filename = sys.argv[1]
results = {}
try:
results = parse_angsd_results(angsd_filename)
finally:
print("angsd_{}\t{:d}".format("nsites", results.get("nsites", 0) ), end='\t')
print("angsd_{}\t{:.3g}".format("MoM", results.get("MoM", -1.0) ), end='\t')
print("angsd_{}\t{:.2g}".format("SE(MoM)", results.get("SE(MoM)", -1.0) ), end='\t')
print("angsd_{}\t{:.3g}".format("ML", results.get("ML", -1.0) ), end='\t')
print("angsd_{}\t{:.2g}".format("SE(ML)", results.get("SE(ML)", -1.0) ), end='\t')
print('')