forked from horsedayday/DualGCN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dualgcn_infer_improve.py
167 lines (137 loc) · 6.24 KB
/
dualgcn_infer_improve.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
"""
author: @cesarasa
Code for inferencing with the trained DualGCN model.
Required outputs
----------------
All the outputs from this train script are saved in params["model_outdir"].
1. Trained model.
The model is trained with train data and validated with val data. The model
file name and file format are specified, respectively by
params["model_file_name"] and params["model_file_format"].
For DualGCN, the saved model:
model.h5
2. Predictions on val data.
Raw model predictions calcualted using the trained model on val data. The
predictions are saved in val_y_data_predicted.csv
3. Prediction performance scores on val data.
The performance scores are calculated using the raw model predictions and
the true values for performance metrics specified in the metrics_list. The
scores are saved as json in val_scores.json
This script is based on the GraphDRP code made by Dr. Partin.
"""
import numpy as np
import os
import sys
from pathlib import Path
from keras.models import load_model
# [Req] Model Specific imports
from dualgcn_train_improve import MetadataFramework, ModelEvaluate, train_params, metrics_list
from dualgcn_preprocess_improve import preprocess_parameters
from model_utils.feature_extraction import CelllineGraphAdjNorm, FeatureExtract
from code.layers.graph import GraphConv
# [Req] IMPROVE/CANDLE imports
import improve.framework as frm
from improve.metrics import compute_metrics
from improve import drug_resp_pred as drp
filepath = Path(__file__).resolve().parent # [Req]
# ---------------------
# [Req] Parameter lists
# ---------------------
# Two parameter lists are required:
# 1. app_infer_params
# 2. model_infer_params
#
# The values for the parameters in both lists should be specified in a
# parameter file that is passed as default_model arg in
# frm.initialize_parameters().
# 1. App-specific params (App: monotherapy drug response prediction)
# Currently, there are no app-specific params in this script.
app_infer_params = []
# 2. Model-specific params (Model: GraphDRP)
# All params in model_infer_params are optional.
# If no params are required by the model, then it should be an empty list.
model_infer_params = []
# [Req] Combine the two lists (the combined parameter list will be passed to
# frm.initialize_parameters() in the main().
infer_params = app_infer_params + model_infer_params
# ---------------------
# model_path = os.path.join(params["model_outdir"], f'{params["model_file_name"]}{params["model_file_format"]}')
# print(model_path)
# [Req]
def run(params):
""" Run model inference.
Args:
params (dict): dict of CANDLE/IMPROVE parameters and parsed values.
Returns:
dict: prediction performance scores computed on test data according
to the metrics_list.
"""
batch_size = params["batch_size"]
print(params)
# ------------------------------------------------------
# [Req] Create output dir
# ------------------------------------------------------
frm.create_outdir(outdir=params["infer_outdir"])
# ------------------------------------------------------
# [Req] Create data names for test set
# ------------------------------------------------------
test_data_fname = frm.build_ml_data_name(params, stage="test")
data_train_idx, data_test_idx, data_val_idx, drug_feature, ppi_adj_info, common_genes = MetadataFramework(params)
ppi_adj = CelllineGraphAdjNorm(ppi_adj_info, common_genes, params)
# Data Extraction
X_drug_feat, X_drug_adj, X_cellline_feat, Y = FeatureExtract(data_train_idx,
drug_feature,
params,
israndom=False)
# Data Normalization + Type Conversion
X_cellline_feat_mean = np.mean(X_cellline_feat, axis=0)
X_cellline_feat_std = np.std(X_cellline_feat, axis=0)
X_cellline_feat = (X_cellline_feat - X_cellline_feat_mean) / X_cellline_feat_std
X_cellline_feat = X_cellline_feat.astype('float16')
X_drug_feat = X_drug_feat.astype('float16')
X_drug_adj = X_drug_adj.astype('float16')
ppi_adj = ppi_adj.astype('float16')
# Data for inference
X = [X_drug_feat, X_drug_adj, X_cellline_feat, np.tile(ppi_adj, (X_drug_feat.shape[0], 1, 1))]
# Loading the Model
# Load the best saved model (as determined based on val data)
model_path = frm.build_model_path(params, model_dir=params["model_dir"]) # [Req]
model = load_model(model_path, custom_objects={'GraphConv': GraphConv})
pcc, rmse, spearman, rsquared, Y_pred = ModelEvaluate(model=model,
X_val=X,
Y_val=Y,
data_test_idx_current=data_test_idx,
eval_batch_size=batch_size)
# ------------------------------------------------------
# [Req] Save raw predictions in dataframe
# ------------------------------------------------------
frm.store_predictions_df(params,
y_true = Y,
y_pred = Y_pred,
stage = 'test',
outdir = params["infer_outdir"])
# ------------------------------------------------------
# [Req] Compute performance scores
# ------------------------------------------------------
test_scores = frm.compute_performace_scores(params,
y_true = Y,
y_pred= Y_pred,
stage = 'test',
outdir = params["infer_outdir"],
metrics = metrics_list)
# [Req]
def main(args):
# [Req]
additional_definitions = preprocess_parameters + train_params + infer_params
params = frm.initialize_parameters(
filepath,
default_model="params_cs.txt",
additional_definitions=additional_definitions,
required=None,
)
test_scores = run(params)
print("\nFinished model inference.")
return None
# [Req]
if __name__ == "__main__":
main(sys.argv[1:])