forked from JerryJohnThomas/OELP_code
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataset_featuriser_old.py
100 lines (85 loc) · 3.47 KB
/
dataset_featuriser_old.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#%% Loss and Optimizer
weights = torch.tensor([1, 10], dtype=torch.float32).to(device)
loss_fn = torch.nn.CrossEntropyLoss(weight=weights)
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)
#%% Prepare training
NUM_GRAPHS_PER_BATCH = 256
train_loader = DataLoader(train_dataset,
batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True)
test_loader = DataLoader(test_dataset,
batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True)
def train(epoch):
# Enumerate over the data
all_preds = []
all_labels = []
for _, batch in enumerate(tqdm(train_loader)):
# Use GPU
batch.to(device)
# Reset gradients
optimizer.zero_grad()
# Passing the node features and the connection info
pred = model(batch.x.float(),
batch.edge_attr.float(),
batch.edge_index,
batch.batch)
# Calculating the loss and gradients
loss = torch.sqrt(loss_fn(pred, batch.y))
loss.backward()
# Update using the gradients
optimizer.step()
all_preds.append(np.argmax(pred.cpu().detach().numpy(), axis=1))
all_labels.append(batch.y.cpu().detach().numpy())
all_preds = np.concatenate(all_preds).ravel()
all_labels = np.concatenate(all_labels).ravel()
calculate_metrics(all_preds, all_labels, epoch, "train")
return loss
def test(epoch):
all_preds = []
all_labels = []
for batch in test_loader:
batch.to(device)
pred = model(batch.x.float(),
batch.edge_attr.float(),
batch.edge_index,
batch.batch)
loss = torch.sqrt(loss_fn(pred, batch.y))
all_preds.append(np.argmax(pred.cpu().detach().numpy(), axis=1))
all_labels.append(batch.y.cpu().detach().numpy())
all_preds = np.concatenate(all_preds).ravel()
all_labels = np.concatenate(all_labels).ravel()
calculate_metrics(all_preds, all_labels, epoch, "test")
return loss
def calculate_metrics(y_pred, y_true, epoch, type):
print(f"\n Confusion matrix: \n {confusion_matrix(y_pred, y_true)}")
print(f"F1 Score: {f1_score(y_pred, y_true)}")
print(f"Accuracy: {accuracy_score(y_pred, y_true)}")
print(f"Precision: {precision_score(y_pred, y_true)}")
print(f"Recall: {recall_score(y_pred, y_true)}")
try:
roc = roc_auc_score(y_pred, y_true)
print(f"ROC AUC: {roc}")
mlflow.log_metric(key=f"ROC-AUC-{type}", value=float(roc), step=epoch)
except:
mlflow.log_metric(key=f"ROC-AUC-{type}", value=float(0), step=epoch)
print(f"ROC AUC: notdefined")
# %% Run the training
with mlflow.start_run() as run:
for epoch in range(500):
# Training
model.train()
loss = train(epoch=epoch)
loss = loss.detach().cpu().numpy()
print(f"Epoch {epoch} | Train Loss {loss}")
mlflow.log_metric(key="Train loss", value=float(loss), step=epoch)
# Testing
model.eval()
if epoch % 5 == 0:
loss = test(epoch=epoch)
loss = loss.detach().cpu().numpy()
print(f"Epoch {epoch} | Test Loss {loss}")
mlflow.log_metric(key="Test loss", value=float(loss), step=epoch)
scheduler.step()
print("Done.")
# %% Save the model
mlflow.pytorch.log_model(model, "model")