forked from hovestadt/methylCtools
-
Notifications
You must be signed in to change notification settings - Fork 1
/
faconv.py
87 lines (64 loc) · 2.48 KB
/
faconv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/env python
#######################################
# methylCtools faconv
# v1.0.0
# 10 june 2018
#
# volker hovestadt
# developed at the german cancer research center, 2011-2015
# methylctools@hovestadt.bio
#
#
# converts reference sequence C to T (Watson strand) and G to A (Crick strand).
# each fasta entry is read into memory, and written to output file (interleaved).
# to read or write from stdin or stdout, use "-".
def mod_faconv(sysargv):
import sys
import argparse
import datetime
def nicetime(): return datetime.datetime.now().strftime("[faconv %Y-%m-%d %H:%M:%S]")
#######################################
# arguments, filehandles
parser = argparse.ArgumentParser(prog="methylCtools faconv", version="0.9.4", description="converts reference sequence C to T (Watson strand) and G to A (Crick strand)")
parser.add_argument("-s", "--silent", dest="qf", action="store_false", help="do not show status messages")
groupinput = parser.add_argument_group("input files, required")
groupinput.add_argument("inREF", metavar="ref.fa", action="store", default=False, help="reference sequence (fasta format)")
groupoutput = parser.add_argument_group("output files, will be created")
groupoutput.add_argument("outCONV", metavar="ref.conv.fa", action="store", default=False, help="converted reference sequence")
args = parser.parse_args(sysargv)
try:
inFA = open(args.inREF, "r")
outFA = open(args.outCONV, "w")
except IOError as strerror:
sys.exit("methylCtools faconv: error: %s" % strerror)
if args.qf: sys.stderr.write("%s command: %s\n" % (nicetime(), " ".join(sys.argv)))
#######################################
# main
if args.qf: sys.stderr.write("%s start: converting reference genome\n" % nicetime())
cname = False
for line in inFA:
if line[0] == ">":
if cname:
outFA.write(">%s_W\n" % cname)
outFA.write(seqW)
outFA.write(">%s_C\n" % cname)
outFA.write(seqC)
cname = line.rstrip()[1:]
seqW = ""
seqC = ""
if args.qf: sys.stderr.write("%s status: processing %s\n" % (nicetime(), cname))
else:
seqW += line.replace("C", "T").replace("c", "t")
seqC += line.replace("G", "A").replace("g", "a")
outFA.write(">%s_W\n" % cname)
outFA.write(seqW)
outFA.write(">%s_C\n" % cname)
outFA.write(seqC)
#######################################
# end
inFA.close()
outFA.close()
if args.qf: sys.stderr.write("%s end: reference genome converted\n" % nicetime())
if __name__ == "__main__":
import sys
mod_faconv(sys.argv[1:])