-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathuser_base.py
78 lines (70 loc) · 2.79 KB
/
user_base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# --- Import Libraries --- #
import pandas as pd
from scipy.spatial.distance import cosine
import time
from timeit import default_timer as timer
print time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
start = timer()
# --- Read Data --- #
data = pd.read_csv('data.csv')
# --- Start Item Based Recommendations --- #
# Drop any column named "user"
data_germany = data.drop('user', 1)
# Create a placeholder dataframe listing item vs. item
data_ibs = pd.DataFrame(index=data_germany.columns,columns=data_germany.columns)
# Lets fill in those empty spaces with cosine similarities
# Loop through the columns
# avg 486 secs
for i in range(0,len(data_ibs.columns)):
# Loop through the columns for each column
for j in range(0,len(data_ibs.columns)) :
# Fill in placeholder with cosine similarities
data_ibs.ix[i, j] = 1-cosine(data_germany.ix[:, i], data_germany.ix[:, j])
end = timer()
print("Time taken:", end-start)
#
# # Create a placeholder items for closes neighbours to an item
# data_neighbours = pd.DataFrame(index=data_ibs.columns,columns=[range(1,11)])
#
# # Loop through our similarity dataframe and fill in neighbouring item names
# for i in range(0, len(data_ibs.columns)):
# data_neighbours.ix[i, :10] = data_ibs.ix[0:, i].sort_values(ascending=False)[:10].index
#
# # --- End Item Based Recommendations --- #
#
# # --- Start User Based Recommendations --- #
#
# # Helper function to get similarity scores
# def getScore(history, similarities):
# return sum(history*similarities)/sum(similarities)
#
# # Create a place holder matrix for similarities, and fill in the user name column
# data_sims = pd.DataFrame(index=data.index,columns=data.columns)
# data_sims.ix[:,:1] = data.ix[:,:1]
#
# #Loop through all rows, skip the user column, and fill with similarity scores
# for i in range(0,len(data_sims.index)):
# for j in range(1,len(data_sims.columns)):
# user = data_sims.index[i]
# product = data_sims.columns[j]
#
# if data.ix[i][j] == 1:
# data_sims.ix[i][j] = 0
# else:
# product_top_names = data_neighbours.ix[product][1:10]
# product_top_sims = data_ibs.ix[product].sort_values(ascending=False)[1:10]
# user_purchases = data_germany.ix[user,product_top_names]
#
# data_sims.ix[i][j] = getScore(user_purchases,product_top_sims)
#
# # Get the top songs
# data_recommend = pd.DataFrame(index=data_sims.index, columns=['user','1','2','3','4','5','6'])
# data_recommend.ix[0:,0] = data_sims.ix[:,0]
#
# # Instead of top song scores, we want to see names
# for i in range(0,len(data_sims.index)):
# data_recommend.ix[i,1:] = data_sims.ix[i,:].sort_values(ascending=False).ix[1:7,].index.transpose()
#
# # Print a sample
# print data_recommend.ix[:, :6]
# print time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())