-
Notifications
You must be signed in to change notification settings - Fork 7
/
tune_link_pred.py
113 lines (89 loc) · 3.38 KB
/
tune_link_pred.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
"""
version 1.0
date 2021/02/04
"""
import argparse
import random
import numpy as np
import optuna
import torch
from numpy import mean
from tqdm import tqdm
from models import VGAEmf
from train import LinkPredTrainer
from utils import LinkPredData, apply_mask, generate_mask
parser = argparse.ArgumentParser()
parser.add_argument('--dataset',
default='cora',
choices=['cora', 'citeseer'],
help='dataset name')
parser.add_argument('--type',
default='uniform',
choices=['uniform', 'bias', 'struct'],
help="uniform randomly missing, biased randomly missing, and structurally missing")
parser.add_argument('--rate', default=0.1, type=float, help='missing rate')
parser.add_argument('--nhid', default=32, type=int, help='the number of hidden units')
parser.add_argument('--latent_dim', default=16, type=int, help='the dimension of latent variables')
parser.add_argument('--ncomp', default=5, type=int, help='the number of Gaussian components')
parser.add_argument('--epoch', default=1000, type=int, help='the number of training epochs')
parser.add_argument('--seed', default=17, type=int)
args = parser.parse_args()
TRIAL_SIZE = 100
TIMEOUT = 60 * 60 * 3
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)
torch.backends.cudnn.deterministic = True
print(args.dataset, args.type, args.rate)
print("num of components:", args.ncomp)
print("nhid:", args.nhid)
print("epochs:", args.epoch)
# generate all masks for the experiment
tmpdata = LinkPredData(args.dataset)
masks = [generate_mask(tmpdata.features, args.rate, args.type) for _ in range(5)]
def objective(trial):
# Tune hyperparameters (dropout, weight decay, learning rate) using Optuna
dropout = trial.suggest_uniform('dropout', 0., 0.1)
lr = trial.suggest_loguniform('lr', 5e-4, 2e-2)
weight_decay = trial.suggest_loguniform('weight_decay', 1e-10, 1e-3)
# prepare data and model
data = LinkPredData(args.dataset, seed=args.seed)
apply_mask(data.features, masks[0])
model = VGAEmf(data, args.nhid, args.latent_dim, dropout, args.ncomp)
# run model
params = {
'lr': lr,
'weight_decay': weight_decay,
'epochs': args.epoch,
}
trainer = LinkPredTrainer(data, model, params, niter=10)
result = trainer.run()
return - result['val_auc']
def tune_hyperparams():
study = optuna.create_study()
study.optimize(objective, n_trials=TRIAL_SIZE, timeout=TIMEOUT)
return study.best_params
def evaluate_model(hyperparams):
means = []
for mask in tqdm(masks):
# generate missing data, model and trainer
data = LinkPredData(args.dataset, seed=args.seed)
apply_mask(data.features, mask) # convert masked number to nan
model = VGAEmf(data, args.nhid, args.latent_dim, hyperparams['dropout'], args.ncomp)
params = {
'lr': hyperparams['lr'],
'weight_decay': hyperparams['weight_decay'],
'epochs': args.epoch,
}
trainer = LinkPredTrainer(data, model, params, niter=20)
# run the model
result = trainer.run()
means.append(result['test_auc'])
return mean(means)
def main():
hyper_params = tune_hyperparams()
result = evaluate_model(hyper_params)
print(result)
if __name__ == '__main__':
main()