-
Notifications
You must be signed in to change notification settings - Fork 1
/
split-on-ns.py
executable file
·31 lines (24 loc) · 806 Bytes
/
split-on-ns.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#!/usr/bin/env python
import sys
import argparse
# ----- command line parsing -----
parser = argparse.ArgumentParser(description="Splits contigs in fasta by removing Ns.")
parser.add_argument("file", type=str, help="Fasta file.")
args = parser.parse_args()
# ----- end command line parsing -----
fasta = open(args.file)
names = []
sequences = {}
current = ""
for line in fasta:
if line[0] == '>':
current = line[1:].strip().split()[0]
names.append(current)
sequences[current] = ""
else:
sequences[current] += line.strip()
for name in names:
segments = sequences[name].upper().replace('N', ' ').split()
for n, segment in enumerate(segments):
sys.stdout.write(">{:s}-{:d}\n".format(name, n+1))
sys.stdout.write("{:s}\n".format(segment))