-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathPreprocess.py
53 lines (50 loc) · 1.19 KB
/
Preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import distance
import time
import numpy as np
def distance_matrix():
global dist
for i in range(0,count+1):
for j in range(0,i):
if i!=j:
dist[i][j]=distance.levenshtein(seq[i],seq[j])
dist[j][i]=dist[i][j]
values.insert(i,dist[i][j])
print(i)
values.sort()
return dist
def preprocess():
global count
t=len(lines)
for i in range(len(lines)):
line=lines[i]
if line[0]=='>':
r=""
i+=1
line=lines[i]
while(line[0]!='>'):
r+=line
i+=1
if i < t:
line=lines[i]
else:
break
count+=1
seq[count]=r
#MAIN
"""
Stores the distance matrix in a .npy file
"""
f=open("data_amino2.txt","r").read()
h=open("edited.txt","w")
lines=f.splitlines()
seq=dict()
values=list()
count=-1
start=time.time()
preprocess()
print("Preprocessing done\t" +str(time.time()-start))
dist = np.zeros(shape=(count+1,count+1))
start=time.time()
a=distance_matrix()
print("Distance Matrix Calculation done\t" + str(time.time()-start))
np.save('distance_matrix.npy',a)