-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathsigma.py
108 lines (94 loc) · 4.14 KB
/
sigma.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from sigma.GraphData import *
from sigma.model import *
import pandas as pd
from pandas import DataFrame
def Model_prediction(ifile,ParameterPath,mfileh5,ofile,Isevaluate = 0):
'''
* Predict
*
* Attributes
* ----------
* ifile : File path for storing the data of smiles and adduct
* ParameterPath : File path for storing model parameter data
* mfileh5 : File path where the model is storeds
* ofile : Path to save ccs prediction values
* Isevaluate : Evaluate ?
'''
smiles, adduct, ccs = read_data(ifile)
print('## Read data : ',len(smiles))
param = None
with open(ParameterPath,'rb') as file:
param = pickle.loads(file.read())
print('## All Atoms : ', param.All_Atoms)
print('## All Adduct : ', param.adduct_SET)
smiles, adduct, ccs, Coordinate = Generating_coordinates(smiles, adduct, ccs, param.All_Atoms)
# print(len(smiles),smiles[0],adduct[0],ccs[0])
print('## 3D coordinates generated successfully ')
for i in range(len(Coordinate)):
Coordinate[i] = (np.array(Coordinate[i]) - param.Min_Coor) / (param.Max_Coor - param.Min_Coor)
adj, features, edge_features = convertToGraph(smiles, Coordinate, param.All_Atoms)
DataSet = MyDataset(features, adj, edge_features, ccs)
print('## Graph & Adduct dataset completed')
ECC_Model = load_Model_from_file(mfileh5)
print('## Model loading completed')
re = predict(ECC_Model,param.adduct_SET,DataSet,adduct,)
data = {'SMILES' : smiles,
'Adduct' : adduct,
'Ture CCS': ccs,
'Predicted CCS':re}
df = DataFrame(data)
df.to_csv(ofile,index=False)
print('## CCS predicted completed')
if Isevaluate == 1:
re_Metrics = Metrics(ccs,re)
return re_Metrics
def Model_train(ifile, ParameterPath, ofile, EPOCHS, BATCHS, Vis, All_Atoms=[], adduct_SET=[]):
'''
* Train
*
* Attributes
* ----------
* ifile : File path for storing the data of smiles and adduct
* ParameterPath : Save path of related data parameters
* ofile : File path where the model is stored
'''
# Read the smiles adduct CCS in the file
smiles, adduct, ccs = read_data(ifile)
print('## Read data : ',len(smiles))
# If the user does not enter the number of elements, then the default is the set of all elements in the training set
if len(All_Atoms) == 0:
All_Atoms = GetSmilesAtomSet(smiles) # Calculate the set of elements used in the training set
# 3D conformation of the input SMILES
smiles, adduct, ccs, Coordinate = Generating_coordinates(smiles, adduct, ccs, All_Atoms)
print('## 3D coordinates generated successfully ')
# Data normalization of the generated coordinate data
ALL_DATA = []
for i in Coordinate:
for ii in i:
ALL_DATA.append(ii[0]);ALL_DATA.append(ii[1]);ALL_DATA.append(ii[2]);
Max_Coor, Min_Coor = np.max(ALL_DATA), np.min(ALL_DATA)
for i in range(len(Coordinate)):
Coordinate[i] = (np.array(Coordinate[i]) - Min_Coor) / (Max_Coor - Min_Coor)
# Adduct set
if len(adduct_SET) == 0:
adduct_SET = list(set(list(adduct)))
adduct_SET.sort()
print('## All element types : ', All_Atoms)
print('## All adduct types : ', adduct_SET)
# Storing parameters in objects
rw = parameter.Parameter(adduct_SET, All_Atoms, Max_Coor, Min_Coor)
output_hal = open(ParameterPath, 'wb')
output_hal.write(pickle.dumps(rw))
output_hal.close()
# Construct Graph from the input data
adj, features, edge_features = convertToGraph(smiles, Coordinate, All_Atoms)
DataSet = MyDataset(features, adj, edge_features, ccs)
print('## Build graph data successfully. Dataset:', DataSet)
#print('## Dataset:',DataSet,' Node features:',DataSet[0].x.shape,' Edge features:',DataSet[0].e.shape,)
# Production of models for training
ECC_Model = Mymodel(DataSet,adduct_SET)
# Training Model
ECC_Model = train(ECC_Model,DataSet,adduct,adduct_SET, EPOCHS = EPOCHS, BATCHS = BATCHS, Vis = Vis)
# Save model
ECC_Model.save(ofile)
return ECC_Model