-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdata_loader.py
65 lines (46 loc) · 1.86 KB
/
data_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import numpy as np
import os
def load_data():
n_user, n_item, train_data, eval_data, test_data = load_rating()
n_entity, n_relation, kg = load_kg()
print('data loaded.')
return n_user, n_item, n_entity, n_relation, train_data, eval_data, test_data, kg
def load_rating():
print('reading rating file ...')
# reading rating file
rating_file = 'data/movie/ratings_final'
if os.path.exists(rating_file + '.npy'):
rating_np = np.load(rating_file + '.npy')
else:
rating_np = np.loadtxt(rating_file + '.txt', dtype=np.int32)
np.save(rating_file + '.npy', rating_np)
n_user = len(set(rating_np[:, 0]))
n_item = len(set(rating_np[:, 1]))
train_data, eval_data, test_data = dataset_split(rating_np)
return n_user, n_item, train_data, eval_data, test_data
def dataset_split(rating_np):
print('splitting dataset ...')
# train:eval:test = 6:2:2
eval_ratio = 0.2
test_ratio = 0.2
n_ratings = rating_np.shape[0]
eval_indices = np.random.choice(list(range(n_ratings)), size=int(n_ratings * eval_ratio), replace=False)
left = set(range(n_ratings)) - set(eval_indices)
test_indices = np.random.choice(list(left), size=int(n_ratings * test_ratio), replace=False)
train_indices = list(left - set(test_indices))
train_data = rating_np[train_indices]
eval_data = rating_np[eval_indices]
test_data = rating_np[test_indices]
return train_data, eval_data, test_data
def load_kg():
print('reading KG file ...')
# reading kg file
kg_file = 'data/movie/kg_final'
if os.path.exists(kg_file + '.npy'):
kg = np.load(kg_file + '.npy')
else:
kg = np.loadtxt(kg_file + '.txt', dtype=np.int32)
np.save(kg_file + '.npy', kg)
n_entity = len(set(kg[:, 0]) | set(kg[:, 2]))
n_relation = len(set(kg[:, 1]))
return n_entity, n_relation, kg