-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSIFT_GO_pred.py
55 lines (44 loc) · 1.53 KB
/
SIFT_GO_pred.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import numpy as np
import pandas as pd
import os
import sys
import pickle
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import RandomizedSearchCV
GO_idx=int(sys.argv[1])
GO_term_array=np.load('sift_data/mat_Y.npy')
features=np.load('sift_data/500_mat_X.npy')
filename='sift_data/list15to500GO_GAB.pkl'
with open(filename, 'rb') as handle:
GO_terms=pickle.load(handle)
pipe_LinearSVC = Pipeline([
('clf', LinearSVC(penalty='l1', dual=False, C=0.1, class_weight="balanced", max_iter=1000, random_state=0)),
])
'''
Searching for hyperparameters with Randomized search
'''
params = {"clf__penalty": ['l1'],
"clf__C": [0.1]}
seed = 551 # Setting a constant seed for repeatability.
num_iter=1
cv=5 #number of cross-validation folds
pipe=pipe_LinearSVC
filename='sift_data/sift_GO_chunk.pkl'
with open(filename, 'rb') as handle:
chunk_list=pickle.load(handle)
idx=chunk_list[GO_idx]
X=features
scores=[]
for i in idx:
Y=GO_term_array[:,i]
random_search = RandomizedSearchCV(pipe, param_distributions = params, scoring='roc_auc',cv=cv, verbose = 10, random_state = seed, n_iter = num_iter)
random_search.fit(X, Y)
#getting cross validation results
results=random_search.cv_results_
data = {"mean_test_score": list(results.get('mean_test_score').data),}
print(data["mean_test_score"])
scores.append([data, GO_terms[i]])
filename='data/sift_data/pred/pred_GO%s.pkl' % (str(GO_idx))
with open(filename, 'wb') as handle:
pickle.dump(scores, handle)