-
Notifications
You must be signed in to change notification settings - Fork 0
/
meme_txt_parser.py
executable file
·52 lines (40 loc) · 1.43 KB
/
meme_txt_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import tempfile
import pandas as pd
def parse_motif_sites(f):
"""This is coarse motif sites parser
use with caution - check if meme parsing in Biopython is finally supported before using this
"""
r_start = "sites sorted by position p-value"
for line in f:
if r_start in line:
motif_name = line.strip()[:-(len(r_start)+1)].split(' ')[-1]
for _ in range(2):
t = next(f)
pointer_line = next(f)
line = next(f)
tab = []
while not char_all(line.strip(), '-'):
tab.append(line)
line = next(f)
yield motif_name, tab, pointer_line
def char_all(l, char):
"""
check if line consists of chars only
:param l: str
:return: bool
"""
ls = set(l)
if len(ls) == 1 and list(ls)[0] == char:
return True
else:
return False
def read_file(file, no_strand=False):
columns = ['motif_id', 'sequence_name', 'strand', 'start', 'P-value', '_site_left', 'site', '_site_right']
if no_strand:
columns.remove('strand')
with tempfile.TemporaryFile(mode="r+") as t, open(file, 'r') as f:
for motif_name, table, pointer in parse_motif_sites(f):
for line in table:
t.write("{} {}".format(motif_name, line))
t.seek(0)
return pd.read_table(t, names=columns, header=None, delim_whitespace=True)