-
Notifications
You must be signed in to change notification settings - Fork 0
/
calc_ssv2.py
91 lines (76 loc) · 2.67 KB
/
calc_ssv2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import argparse
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.metrics import roc_auc_score
parser = argparse.ArgumentParser()
parser.add_argument('--input_file_1', type = str, required = True, help = 'input file file')
parser.add_argument('--input_file_2', type = str, required = True, help = 'input file file')
parser.add_argument('--vid_per_caption', type = int, default = 588, help = 'captions per video')
args = parser.parse_args()
def create_labels(labels):
res = []
for i in range(len(labels)):
tmp = []
for j in range(len(labels[i])):
if labels[i][j] == 1:
tmp.append(j)
res.append(tmp)
res = np.array(res)
return np.squeeze(res)
def main():
df1 = pd.read_csv(args.input_file_1) ## videopath, caption, match
print(len(df1))
print(df1.head())
df2 = pd.read_csv(args.input_file_2, names = ['videopath', 'caption', 'entailment', 'pos', 'neg'])
print(len(df2))
print(df2.head())
df = pd.merge(df1, df2, on = ['videopath', 'caption'], how = 'inner')
print(len(df))
df = df.sort_values(by=['videopath', 'caption'])
predictions = []
labels = []
captions = []
for _, cap_df in df.groupby(['caption']):
if len(cap_df) == args.vid_per_caption:
cap_df = cap_df.sort_values(by=['videopath', 'caption'])
predictions.append(cap_df['entailment'].tolist())
labels.append(cap_df['match'].tolist())
captions.append(cap_df['caption'].tolist())
predictions = np.array(predictions)
labels = np.array(labels)
print(len(labels))
print(sum(labels[0]))
mAP = []
r1 = []
r5 = []
r10 = []
for j in tqdm(range(len(predictions))):
pred = predictions[j]
# pred = np.random.rand(len(predictions[j]))
ranks = np.argsort(-pred)
hit_precision = []
hits = 0
for i in range(len(ranks)):
if labels[j][ranks[i]] == 1:
hits += 1
hit_precision.append(hits/(i+1))
average_precision = sum(hit_precision)/len(hit_precision)
mAP.append(average_precision)
count = 0
for i in range(len(ranks[:10])):
if labels[j][ranks[i]] == 1:
count = 1
if i == 0:
r1.append(count)
if i == 4:
r5.append(count)
if i == 9:
r10.append(count)
mean_ap = sum(mAP) / len(mAP)
r1 = sum(r1) / len(r1)
r5 = sum(r5) / len(r5)
r10 = sum(r10) / len(r10)
print(f'mAP: {100 * mean_ap} | R@1: {100 * r1} | R@5: {100 * r5} | R@10: {100 * r10}')
if __name__ == '__main__':
main()