-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluate.py
116 lines (95 loc) · 3.82 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import os
import argparse
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader
import yaml
from sklearn.metrics import accuracy_score, confusion_matrix, log_loss, classification_report
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sn
from dataset import SceneDataset
from mean_model import load_model
torch.multiprocessing.set_sharing_strategy('file_system')
parser = argparse.ArgumentParser(description='evaluation')
parser.add_argument('--experiment_path', type=str, required=True)
parser.add_argument('--cuda', type=int, default=0, required=False,
help='set the cuda device')
args = parser.parse_args()
with open(os.path.join(args.experiment_path, "config.yaml"), "r") as reader:
config = yaml.load(reader, Loader=yaml.FullLoader)
mean_std_audio = np.load(config["data"]["audio_norm"])
mean_std_video = np.load(config["data"]["video_norm"])
mean_audio = mean_std_audio["global_mean"]
std_audio = mean_std_audio["global_std"]
mean_video = mean_std_video["global_mean"]
std_video = mean_std_video["global_std"]
audio_transform = lambda x: (x - mean_audio) / std_audio
video_transform = lambda x: (x - mean_video) / std_video
tt_ds = SceneDataset(config["data"]["test"]["audio_feature"],
config["data"]["test"]["video_feature"],
audio_transform,
video_transform)
config["data"]["dataloader_args"]["batch_size"] = 1
tt_dataloader = DataLoader(tt_ds, shuffle=False, **config["data"]["dataloader_args"])
model_cfg = config['model']
model = load_model(config['model_name'])(**model_cfg)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "cuda:{}".format(args.cuda)
model.load_state_dict(torch.load(
os.path.join(args.experiment_path, "best_model.pt"), "cpu")
)
model = model.to(device).eval()
targets = []
probs = []
preds = []
aids = []
with torch.no_grad():
tt_dataloader = tqdm(tt_dataloader)
for batch_idx, batch in enumerate(tt_dataloader):
audio_feat = batch["audio_feat"].to(device)
video_feat = batch["video_feat"].to(device)
target = batch["target"].to(device)
logit = model(audio_feat, video_feat)
pred = torch.argmax(logit, 1)
targets.append(target.cpu().numpy())
probs.append(torch.softmax(logit, 1).cpu().numpy())
preds.append(pred.cpu().numpy())
aids.append(np.array(batch["aid"]))
targets = np.concatenate(targets, axis=0)
preds = np.concatenate(preds, axis=0)
probs = np.concatenate(probs, axis=0)
aids = np.concatenate(aids, axis=0)
writer = open(os.path.join(args.experiment_path, "result.txt"), "w")
cm = confusion_matrix(targets, preds)
keys = ['airport',
'bus',
'metro',
'metro_station',
'park',
'public_square',
'shopping_mall',
'street_pedestrian',
'street_traffic',
'tram']
scenes_pred = [keys[pred] for pred in preds]
scenes_label = [keys[target] for target in targets]
pred_dict = {"aid": aids, "scene_pred": scenes_pred, "scene_label": scenes_label}
for idx, key in enumerate(keys):
pred_dict[key] = probs[:, idx]
pd.DataFrame(pred_dict).to_csv(os.path.join(args.experiment_path, "prediction.csv"),
index=False,
sep="\t",
float_format="%.3f")
print(classification_report(targets, preds, target_names=keys), file=writer)
df_cm = pd.DataFrame(cm.astype('float') / cm.sum(axis=1)[:, np.newaxis],
index=keys, columns=keys)
plt.figure(figsize=(15, 12))
sn.heatmap(df_cm, annot=True)
plt.savefig(os.path.join(args.experiment_path, 'cm.png'))
acc = accuracy_score(targets, preds)
print(' ', file=writer)
print(f'accuracy: {acc:.3f}', file=writer)
logloss = log_loss(targets, probs)
print(f'overall log loss: {logloss:.3f}', file=writer)