-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetmatrix.py
32 lines (27 loc) · 907 Bytes
/
getmatrix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import sys, os, re, platform
import pandas as pd
import numpy as np
with open('/Users/shixinjin/Desktop/CNN/Lys/Supp-4-negative.fasta') as positive:
records=positive.read()
AAindex=pd.read_csv('/Users/shixinjin/Desktop/AAidx.csv',index_col=0,header=0)
records=records.split('>')[1:]
myFasta=[]
namelist=[]
for fasta in records:
array=fasta.split('\n')
name=array[0].split()[0]
sequence=''.join(array[1:])
myFasta.append([name,sequence])
namelist.append(name)
result=[]
for aa in myFasta:
singalfasta=aa[1]
for singalaa in singalfasta:
aa[0]=AAindex[singalaa]
result.append(aa[0])
result=pd.DataFrame(result)
indexsum=0
for i in range(len(myFasta)):
locals()[namelist[i]]=(result[indexsum:indexsum+len(myFasta[i][1])]).T
indexsum+=len(myFasta[i][1])
locals()[namelist[i]].to_csv('/Users/shixinjin/Desktop/CNN/Lys/negative/%s.csv' %namelist[i])