-
Notifications
You must be signed in to change notification settings - Fork 12
/
editdistancealleles.py
32 lines (25 loc) · 1.14 KB
/
editdistancealleles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import editdistance
import sys
from subprocess import Popen, PIPE
HLANUC="hla_nuc.fasta"
if len(sys.argv) != 3:
print("Usage: python editdistancealleles.py A*01:01:01 A*03:02:01\nNote: if you do not include the maximum level of specificity (e.g. four fields) an ARBITRARY allele will be chosen matching the prefix given.")
sys.exit()
a1 = sys.argv[1]
a2 = sys.argv[2]
a1p = Popen(["grep", "-F", "-m", "1", a1, HLANUC],stdout=PIPE,stderr=PIPE)
(a1_stdout, _) = a1p.communicate()
a1_name = a1_stdout.split()[0][1:]
a1_allele = a1_stdout.split()[1]
a1p = Popen(["samtools", "faidx", HLANUC, a1_name],stdout=PIPE,stderr=PIPE)
(a1_stdout, _) = a1p.communicate()
a1_seq = "".join([s.strip() for s in a1_stdout.split()[1:]])
a2p = Popen(["grep", "-F", "-m", "1", a2, HLANUC],stdout=PIPE,stderr=PIPE)
(a2_stdout, _) = a2p.communicate()
a2_name = a2_stdout.split()[0][1:]
a2_allele = a2_stdout.split()[1]
a2p = Popen(["samtools", "faidx", HLANUC, a2_name],stdout=PIPE,stderr=PIPE)
(a2_stdout, _) = a2p.communicate()
a2_seq = "".join([s.strip() for s in a2_stdout.split()[1:]])
d = editdistance.eval(a1_seq,a2_seq)
print("CDS edit distance:", a1_allele, a2_allele, d)