-
Notifications
You must be signed in to change notification settings - Fork 26
/
fastq_read_length.py
executable file
·51 lines (39 loc) · 1.08 KB
/
fastq_read_length.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env python
import sys
import gzip
docstring="""
DESCRIPTION
Produce histrogram of read length from input fastq file (gzipped ok)
USAGE
fastq_read_length.py <fastq> <max-reads>
fastq: Input fastq file
max-reads: Stop after reading this many reads (default to read all the reads)
OUTPUT
Tab separated file with: input file name, read length, read count.
"""
if len(sys.argv) not in (2, 3) or sys.argv[1] in ('-h', '--help'):
sys.exit(docstring)
if sys.argv[1].endswith('.gz'):
fin= gzip.open(sys.argv[1])
else:
fin= open(sys.argv[1])
if len(sys.argv) == 3:
max_reads= int(sys.argv[2])
else:
max_reads= None
n= -1
read_count= 0
read_len_hist= {}
for line in fin:
if n % 4 == 0:
read_count += 1
rl= len(line.rstrip())
read_len_hist[rl]= read_len_hist.get(rl, 0) + 1
if max_reads is not None and read_count >= max_reads:
break
n += 1
lengths= sorted([k for k in read_len_hist])
for i in lengths:
print('\t'.join([sys.argv[1], str(i), str(read_len_hist[i])]))
fin.close()
sys.exit()