-
Notifications
You must be signed in to change notification settings - Fork 0
/
seqCnt.py
57 lines (43 loc) · 1.61 KB
/
seqCnt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import sys, numpy, argparse
import fasta, fastq
def fastaHandler(files):
for f in files:
r=fasta.fastaReader(f)
for hdr, seq in r:
yield len(seq)
def fastqHandler(files):
for f in files:
r=fastq.fastqReader(f)
for hdr1, seq, hdr2, qual in r:
yield len(seq)
if __name__=='__main__':
parser=argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("-c", "--combine", dest="combine", action="store_true", default=False, help="combine counts")
parser.add_argument("-t", "--type", dest="type", default="q", help="input file type (a=fasta, q=fastq)")
parser.add_argument("files", nargs=argparse.REMAINDER)
options=parser.parse_args()
print(options)
print ("\tseqs\tbases\tmax\tmin\tmean\tmedian")
if options.type=="a":
h=fastaHandler
elif options.type=="q":
h=fastqHandler
else:
raise Exception("Unknown type")
if options.combine:
scnt=bcnt=0
lens=[]
for l in h(options.files):
scnt+=1
bcnt+=l
lens.append(l)
print ("Combined\t%d\t%d\t%d\t%d\t%d\t%d" % (scnt, bcnt, numpy.max(lens), numpy.min(lens), numpy.mean(lens), numpy.median(lens)))
else:
for f in options.files:
scnt=bcnt=0
lens=[]
for l in h([f,]):
scnt+=1
bcnt+=l
lens.append(l)
print ("%s\t%d\t%d\t%d\t%d\t%d\t%d" % (f, scnt, bcnt, numpy.max(lens), numpy.min(lens), numpy.mean(lens), numpy.median(lens)))