-
Notifications
You must be signed in to change notification settings - Fork 42
/
Copy pathmetrics.py
57 lines (49 loc) · 2.24 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import math
import pandas as pd
class MetronAtK(object):
def __init__(self, top_k):
self._top_k = top_k
self._subjects = None # Subjects which we ran evaluation on
@property
def top_k(self):
return self._top_k
@top_k.setter
def top_k(self, top_k):
self._top_k = top_k
@property
def subjects(self):
return self._subjects
@subjects.setter
def subjects(self, subjects):
"""
args:
subjects: list, [test_users, test_items, test_scores, negative users, negative items, negative scores]
"""
assert isinstance(subjects, list)
test_users, test_items, test_scores = subjects[0], subjects[1], subjects[2]
neg_users, neg_items, neg_scores = subjects[3], subjects[4], subjects[5]
# the golden set
test = pd.DataFrame({'user': test_users,
'test_item': test_items,
'test_score': test_scores})
# the full set
full = pd.DataFrame({'user': neg_users + test_users,
'item': neg_items + test_items,
'score': neg_scores + test_scores})
full = pd.merge(full, test, on=['user'], how='left')
# rank the items according to the scores for each user
full['rank'] = full.groupby('user')['score'].rank(method='first', ascending=False)
full.sort_values(['user', 'rank'], inplace=True)
self._subjects = full
def cal_hit_ratio(self):
"""Hit Ratio @ top_K"""
full, top_k = self._subjects, self._top_k
top_k = full[full['rank']<=top_k]
test_in_top_k =top_k[top_k['test_item'] == top_k['item']] # golden items hit in the top_K items
return len(test_in_top_k) * 1.0 / full['user'].nunique()
def cal_ndcg(self):
full, top_k = self._subjects, self._top_k
top_k = full[full['rank']<=top_k]
test_in_top_k = top_k[top_k['test_item'] == top_k['item']].copy()
test_in_top_k.loc[:,'ndcg'] = test_in_top_k.loc[:,'rank'].apply(lambda x: math.log(2) / math.log(1 + x)) # the rank starts from 1
return test_in_top_k['ndcg'].sum() * 1.0 / full['user'].nunique()