-
Notifications
You must be signed in to change notification settings - Fork 0
/
search_modes.py
124 lines (72 loc) · 4.73 KB
/
search_modes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import elastic_func
from sklearn import preprocessing
import numpy as np
def my_score_a(movie):
values = movie.values[2:]
values = [values for values in values if str(values) != 'nan']
return {**movie, "Final Score" : np.mean(values)}
def private(title, es, index, n):
"""
Returns a shorted Dataframe of the resuluts using only BM25 metric
"""
results = elastic_func.search_to_dataframe(es, index, title, n)
final_results = results
return final_results[["title","score"]].sort_values(by='score', ascending=False)
def basic(title, user_id, es, index, n, reviews):
"""
Returns a shorted Dataframe of the resuluts using only BM25 metric and user reviews
"""
results = elastic_func.search_to_dataframe(es, index, title, n)
u_r = reviews[reviews.userId == user_id].rename(columns={"rating": "u_r"})
m_r = reviews[reviews["movieId"].isin(results['movieId'].values)].groupby(by="movieId").mean().reset_index()[
["movieId", "rating"]].rename(columns={"rating": "m_r"})
results = results.merge(u_r, how="left", on="movieId")[["title", "movieId", "score", "u_r"]]
results = results.merge(m_r, how="left", on="movieId")[["movieId", "title", "score", "u_r", "m_r"]]
min_max_scaler = preprocessing.MinMaxScaler()
results[results.columns[2:]] = min_max_scaler.fit_transform(results[results.columns[2:]])
final_results = results.apply(lambda x: my_score_a(x), result_type='expand', axis=1)
return final_results[["title", "Final Score"]].sort_values(by='Final Score', ascending=False)
def advance(title, user_id, es, index, n, reviews, cluster_reviews, user_cluster):
"""
Returns a shorted Dataframe of the results using complex metric
"""
results = elastic_func.search_to_dataframe(es, index, title, n)
u_r = reviews[reviews.userId == user_id].rename(columns={"rating": "u_r"})
m_r = reviews[reviews["movieId"].isin(results['movieId'].values)].groupby(by="movieId").mean().reset_index()[
["movieId", "rating"]].rename(columns={"rating": "m_r"})
cluster_reviews.index = cluster_reviews['class']
c_r = cluster_reviews.drop(columns=['class']).T.reset_index().rename(columns={"index": "movieId",
user_cluster: "c_r"})
c_r['movieId'] = c_r['movieId'].astype('int64')
results = results.merge(u_r, how="left", on="movieId")[["title", "movieId", "score", "u_r"]]
results = results.merge(m_r, how="left", on="movieId")[["movieId", "title", "score", "u_r", "m_r"]]
results = results.merge(c_r[["movieId", "c_r"]], on='movieId', how='left')
min_max_scaler = preprocessing.MinMaxScaler()
results[results.columns[2:]] = min_max_scaler.fit_transform(results[results.columns[2:]])
final_results = results.apply(lambda x: my_score_a(x), result_type='expand', axis=1)
return final_results[["title", "Final Score"]].sort_values(by='Final Score', ascending=False)
def cambridge_analytica(title, user_id, es, index, n, reviews, cluster_reviews, user_cluster, predictions):
"""
Returns a shorted Dataframe of the results using complex metric
"""
print(user_cluster)
results = elastic_func.search_to_dataframe(es, index, title, n)
u_r = reviews[reviews.userId == user_id].rename(columns={"rating": "u_r"})
m_r = reviews[reviews["movieId"].isin(results['movieId'].values)].groupby(by="movieId").mean().reset_index()[
["movieId", "rating"]].rename(columns={"rating": "m_r"})
cluster_reviews.index = cluster_reviews['class']
c_r = cluster_reviews.drop(columns=['class']).T.reset_index().rename(columns={"index": "movieId",
user_cluster: "c_r"})
c_r['movieId'] = c_r['movieId'].astype('int64')
predictions.index = predictions["userId"]
p_r = predictions.drop(columns=['userId']).T.reset_index().rename(columns={"index": "movieId", user_id: "p_r"})
p_r['movieId'] = p_r['movieId'].astype('int64')
p_r['p_r'] = p_r['p_r'].apply(lambda x: int(x) / 2)
results = results.merge(u_r, how="left", on="movieId")[["title", "movieId", "score", "u_r"]]
results = results.merge(m_r, how="left", on="movieId")[["movieId", "title", "score", "u_r", "m_r"]]
results = results.merge(c_r[["movieId", "c_r"]], on='movieId', how='left')
results = results.merge(p_r[["movieId", "p_r"]], on='movieId', how='left')
min_max_scaler = preprocessing.MinMaxScaler()
results[results.columns[2:]] = min_max_scaler.fit_transform(results[results.columns[2:]])
final_results = results.apply(lambda x: my_score_a(x), result_type='expand', axis=1)
return final_results[["title", "Final Score"]].sort_values(by='Final Score', ascending=False)