-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathhagfish_gapfinder
executable file
·75 lines (54 loc) · 1.76 KB
/
hagfish_gapfinder
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env python
import os
import sys
import math
import pickle
import jinja2
import numpy as np
import logging
import optparse
from hagfish_file_util import *
## Arguments: General options
parser = optparse.OptionParser()
parser.add_option('-f', dest='fasta',
help='reference fasta file')
parser.add_option('-v', dest='verbose', action="count",
help='Show debug information')
options, args = parser.parse_args()
l = logging.getLogger('hagfish')
handler = logging.StreamHandler()
logmark = chr(27) + '[0;37;44mHAGFISH' + \
chr(27) + '[0m '
formatter = logging.Formatter(
logmark + '%(levelname)-6s %(message)s')
handler.setFormatter(formatter)
l.addHandler(handler)
if options.verbose >= 2:
l.setLevel(logging.DEBUG)
elif options.verbose == 1:
l.setLevel(logging.INFO)
else:
l.setLevel(logging.WARNING)
if not options.fasta:
l.critical("Must specify a fasta input file")
sys.exit(-1)
if __name__ == '__main__':
if not os.path.exists('gaps'):
os.makedirs('gaps')
#read an arbitrary seqId file
for f in os.listdir('seqInfo'):
if '.seqinfo' in f:
seqInfoFile = os.path.join('seqInfo', f)
break
else:
l.critical("cannot find a seqInfo file")
sys.exit(-1)
l.info("reading %s for seqinfo" % seqInfoFile)
with open(seqInfoFile) as F:
seqInfo = pickle.load(F)
l.info("discovered %d sequences" % len(seqInfo))
seqslist = set(seqInfo.keys())
for seqId, seq in fastareader(options.fasta):
l.info('Processing %s (%d nt)' % (seqId, len(seq)))
nns = np.array(np.array(list(seq)) == 'n', 'b')
np_savez(os.path.join('gaps', seqId), nns = nns )