Skip to content

Commit 3ed4f31

Browse files
author
Kyle Tretina
committed
adding this script which reverses CDS coordinates which have been switched in a GFF
1 parent 09ff0af commit 3ed4f31

File tree

1 file changed

+66
-0
lines changed

1 file changed

+66
-0
lines changed

reverse_misordered_cds_coords.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#!/usr/bin/env python3
2+
3+
"""
4+
This housekeeping script reads a GFF3 file and reverses all CDS coordinates where stop < start.
5+
6+
The initial use-case here was a GFF file dumped from WebApollo which had this issue.
7+
8+
INPUT EXAMPLE:
9+
unitig_0|quiver IGS gene 2622416 2625146 . + . Name=598BBB62C7AAA2833D58F914778A9B48;ID=598BBB62C7AAA2833D58F914778A9B48
10+
unitig_0|quiver IGS mRNA 2622416 2625146 . + . Name=unitig_0|quiver:2623455-2623886;Parent=598BBB62C7AAA2833D58F914778A9B48;ID=CC226B225CA7A622FE45C9F664899D9E
11+
unitig_0|quiver IGS exon 2624443 2625146 . + . Name=C2E998B335DEAC334DE78741E6E8D536;Parent=CC226B225CA7A622FE45C9F664899D9E;ID=C2E998B335DEAC334DE78741E6E8D536
12+
unitig_0|quiver IGS exon 2622416 2623082 . + . Name=D45C00C52F9C5556594F632A93270FF6;Parent=CC226B225CA7A622FE45C9F664899D9E;ID=D45C00C52F9C5556594F632A93270FF6
13+
unitig_0|quiver IGS exon 2623100 2624381 . + . Name=5E2F4885E4EF1C9D4B2CA97206797637;Parent=CC226B225CA7A622FE45C9F664899D9E;ID=5E2F4885E4EF1C9D4B2CA97206797637
14+
unitig_0|quiver IGS CDS 2624620 2622890 . + 0 Name=CC226B225CA7A622FE45C9F664899D9E-CDS;Parent=CC226B225CA7A622FE45C9F664899D9E;ID=CC226B225CA7A622FE45C9F664899D9E-CDS;Note=Manually set translation start
15+
16+
Author: Kyle Tretina
17+
"""
18+
19+
import argparse
20+
import os
21+
import biocodegff
22+
23+
def main():
24+
parser = argparse.ArgumentParser( description='Reverses CDS coodinates where stop < start')
25+
26+
## output file to be written
27+
parser.add_argument('-i', '--input', type=str, required=True, help='Path to the input GFF3 file' )
28+
parser.add_argument('-o', '--output', type=str, required=True, help='Output GFF3 file to write' )
29+
args = parser.parse_args()
30+
31+
infile = open(args.input)
32+
ofh = open(args.output, 'wt')
33+
34+
for line in infile:
35+
36+
if line.startswith('#'):
37+
ofh.write(line)
38+
continue
39+
40+
line = line.rstrip()
41+
cols = line.split("\t")
42+
43+
if len(cols) != 9:
44+
ofh.write("{0}\n".format(line) )
45+
continue
46+
47+
if cols[2] == 'CDS' and int(cols[4]) < int(cols[3]):
48+
temp = cols[3]
49+
cols[3] = cols[4]
50+
cols[4] = temp
51+
id = biocodegff.column_9_value(cols[8], 'ID')
52+
print("CDS reversed: {0}".format(id))
53+
ofh.write("{0}\n".format("\t".join(cols)) )
54+
else:
55+
ofh.write("{0}\n".format(line) )
56+
57+
58+
if __name__ == '__main__':
59+
main()
60+
61+
62+
63+
64+
65+
66+

0 commit comments

Comments
 (0)