-
Notifications
You must be signed in to change notification settings - Fork 1
/
fasta-subset.py
executable file
·46 lines (37 loc) · 1.29 KB
/
fasta-subset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#!/usr/bin/env python
# gives average of selected column
import sys
import os.path
import argparse
from sets import Set
# ----- command line parsing -----
parser = argparse.ArgumentParser(
description="Prints fasta containing only the given set of sequences.")
parser.add_argument("fasta_file", type=str, help="FASTA file.")
parser.add_argument("sample_names", type=str, help="Sample names.")
parser.add_argument("-p", "--prefix", dest="prefix", action="store_true",
help="Names given are a prefix of the name in the fasta.")
parser.add_argument("-d", "--prefix_delimiter",
help="Delimiter for splitting the prefix.")
parser.set_defaults(prefix=False)
args = parser.parse_args()
# ----- end command line parsing -----
fasta_file = open(args.fasta_file)
sample_names_file = open(args.sample_names)
sample_names = Set()
for line in sample_names_file:
sample_names.add(line[:-1])
printing = False
for line in fasta_file:
if line[0] == '>':
if args.prefix:
name = line[1:-1].split(args.prefix_delimiter)[0]
else:
name = line[1:-1]
if name in sample_names:
printing = True
sys.stdout.write(line)
else:
printing = False
elif printing:
sys.stdout.write(line)