-
Notifications
You must be signed in to change notification settings - Fork 4
/
MyData.py
99 lines (78 loc) · 3.38 KB
/
MyData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import numpy as np
import scipy.sparse as sp
import pickle
import random
from collections import defaultdict
class MyData():
def __init__(self, trainMat, trustMat, seed, num_ng=0, is_training=None):
super(MyData, self).__init__()
self.setRandomSeed(seed)
self.trainMat = trainMat
self.trustMat = trustMat
self.userNum, self.itemNum = trainMat.shape
self.num_ng = num_ng
self.is_training = is_training
train_u, train_v = self.trainMat.nonzero()
train_r = self.trainMat.data
self.ratingClass = np.unique(train_r).size
assert np.sum(self.trainMat.data == 0) == 0
self.train_data = np.hstack(
(train_u.reshape(-1, 1), train_v.reshape(-1, 1), train_r.reshape(-1, 1)))
self.train_data = self.train_data.astype(np.int)
train_u1, train_u2 = self.trustMat.nonzero()
self.trust_data = np.hstack(
(train_u1.reshape(-1, 1), train_u2.reshape(-1, 1)))
self.trust_data = self.trust_data.astype(np.int)
def setRandomSeed(self, seed):
np.random.seed(seed)
random.seed(seed)
def neg_sample(self):
self.train_neg_sample()
self.trust_neg_sample()
def trust_neg_sample(self):
assert self.is_training
tmp_trustMat = self.trustMat.todok()
length = self.trust_data.shape[0]
trust_neg_data = np.random.randint(low=0, high=self.userNum, size=length)
self.trust_data_dict = defaultdict(list)
for i in range(length):
uid = self.trust_data[i][0]
neg_fid = trust_neg_data[i]
if (uid, neg_fid) in tmp_trustMat:
while (uid, neg_fid) in tmp_trustMat:
neg_fid = np.random.randint(low=0, high=self.userNum)
trust_neg_data[i] = neg_fid
self.trust_data_dict[uid].append([uid, self.trust_data[i][1], trust_neg_data[i]])
def train_neg_sample(self):
#'no need to sampling when testing'
assert self.is_training
self.train_data_dict = defaultdict(list)
length = self.trainMat.data.size
train_data = self.trainMat.data
train_neg_data = np.random.randint(low=1, high=self.ratingClass+1, size=length)
rebuild_idx = np.where(train_data == train_neg_data)[0]
for idx in rebuild_idx:
val = np.random.randint(1, self.ratingClass+1)
while val == train_data[idx]:
val = np.random.randint(1, self.ratingClass+1)
train_neg_data[idx] = val
assert np.sum(train_data == train_neg_data) == 0
for i in range(length):
uid = self.train_data[i][0]
iid = self.train_data[i][1]
rating = self.train_data[i][2]
neg_rating = train_neg_data[i]
self.train_data_dict[uid].append([uid, iid, rating, neg_rating])
def getTrainInstance(self, userIdxList):
ui_train = []
uu_train = []
for uidx in userIdxList:
ui_train += self.train_data_dict[uidx]
uu_train += self.trust_data_dict[uidx]
ui_train = np.array(ui_train)
idx = np.random.permutation(len(ui_train))
ui_train = ui_train[idx]
uu_train = np.array(uu_train)
idx = np.random.permutation(len(uu_train))
uu_train = uu_train[idx]
return ui_train, uu_train