-
Notifications
You must be signed in to change notification settings - Fork 29
/
Copy pathdata_loader_recsys.py
executable file
·44 lines (32 loc) · 1.34 KB
/
data_loader_recsys.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import os
from os import listdir
from os.path import isfile, join
import numpy as np
from tensorflow.contrib import learn
# This Data_Loader file is copied online
class Data_Loader:
def __init__(self, options):
positive_data_file = options['dir_name']
positive_examples = list(open(positive_data_file, "r").readlines())
positive_examples = [s for s in positive_examples]
max_document_length = max([len(x.split(",")) for x in positive_examples])
vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
self.item = np.array(list(vocab_processor.fit_transform(positive_examples)))
self.item_dict = vocab_processor.vocabulary_._mapping
def load_generator_data(self, sample_size):
text = self.text
mod_size = len(text) - len(text)%sample_size
text = text[0:mod_size]
text = text.reshape(-1, sample_size)
return text, self.vocab_indexed
def string_to_indices(self, sentence, vocab):
indices = [ vocab[s] for s in sentence.split(',') ]
return indices
def inidices_to_string(self, sentence, vocab):
id_ch = { vocab[ch] : ch for ch in vocab }
sent = []
for c in sentence:
if id_ch[c] == 'eol':
break
sent += id_ch[c]
return "".join(sent)