-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathannotate_enzyme_class.py
71 lines (55 loc) · 1.71 KB
/
annotate_enzyme_class.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import pandas as pd
working_dir = "/path/to/working_dir/"
df = pd.read_excel(working_dir + "merged_rcsb_calls_ffilled_filtered_distance_annotated.xlsx")
new_method = df['Gene_Name'].tolist()
df3 = pd.read_csv(working_dir + "protein_class_Enzymes.tsv", sep="\t")
gene = df3['Gene'].tolist()
biol_process = df3['Biological process'].tolist()
mol_func = df3['Molecular function'].tolist()
pclass = df3['Protein class'].tolist()
biol_dict = dict(zip(gene,biol_process))
mol_dict = dict(zip(gene,mol_func))
pclass_dict = dict(zip(gene,pclass))
new_mol = []
new_biol = []
new_pclass = []
for a in new_method:
temp_vals = a.split(", ")
print(temp_vals)
list1 = []
list2 = []
list3 = []
for q in temp_vals:
try:
list1.append(mol_dict[q])
except:
list1.append("Not_Annotated")
try:
list2.append(biol_dict[q])
except:
list2.append("Not_Annotated")
try:
list3.append(pclass_dict[q])
except:
list3.append("Not_Annotated")
new_mol.append(list1)
new_biol.append(list2)
new_pclass.append(list3)
df['Biological_Process'] = new_biol
df['Molecular_Function'] = new_mol
df['Protein_Class'] = new_pclass
val1 = df['Protein_Class'].tolist()
val2 = df['Molecular_Function'].tolist()
val3 = df['Biological_Process'].tolist()
val4 = df['Title'].tolist()
enzyme_val = []
q=0
while q<len(val1):
string = str(val1[q]) + str(val2[q]) + str(val3[q]) + str(val4[q])
if "Enzyme" in string:
enzyme_val.append(1)
else:
enzyme_val.append(0)
q=q+1
df['Enzyme'] = enzyme_val
df.to_excel(working_dir + "merged_rcsb_calls_ffilled_filtered_distance_annotated.xlsx", index=False)