-
Notifications
You must be signed in to change notification settings - Fork 26
/
bam_read_length.py
executable file
·44 lines (34 loc) · 1.03 KB
/
bam_read_length.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/usr/bin/env python
import pysam
import sys
docstring="""DESCRIPTION
Produce a histogram of read length from bam file
USAGE
bam_read_length.py <sam|bam> <max-lines>
sam|bam: Input sam or bam file
max-reads: Stop after reading this many reads having
read length not None (default to read all the reads)
OUTPUT
Tab separated file with: input file name, read length, read count.
"""
if len(sys.argv) not in (2, 3) or sys.argv[1] in ('-h', '--help'):
sys.exit(docstring)
if len(sys.argv) == 3:
max_reads= int(sys.argv[2])
else:
max_reads= None
read_count= 0
read_len_hist= {}
samfile= pysam.Samfile(sys.argv[1], 'rb')
for line in samfile:
rl= line.alen
if rl is not None:
read_count += 1
read_len_hist[rl]= read_len_hist.get(rl, 0) + 1
if max_reads is not None and read_count >= max_reads:
break
lengths= sorted([k for k in read_len_hist])
for i in lengths:
print('\t'.join([sys.argv[1], str(i), str(read_len_hist[i])]))
samfile.close()
sys.exit()